From 88dfe89b3e3b18f75d15db85585769e1890f68c3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Feb 2020 13:27:34 +0100 Subject: [PATCH] 4.19-stable patches added patches: aio-prevent-potential-eventfd-recursion-on-poll.patch arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch btrfs-set-trans-drity-in-btrfs_commit_transaction.patch drm-rect-avoid-division-by-zero.patch eventfd-track-eventfd_signal-recursion-depth.patch gfs2-fix-o_sync-write-handling.patch gfs2-move-setting-current-backing_dev_info.patch iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch jbd2_seq_info_next-should-increase-position-index.patch kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch nfs-fix-memory-leaks-and-corruption-in-readdir.patch scsi-qla2xxx-fix-unbound-nvme-response-length.patch sunrpc-expiry_time-should-be-seconds-not-timeval.patch tools-kvm_stat-fix-kvm_exit-filter-name.patch watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch xen-balloon-support-xend-based-toolstack-take-two.patch --- ...-potential-eventfd-recursion-on-poll.patch | 70 ++ ...able-pllp-bypass-during-tegra124-lp1.patch | 70 ++ ...e-policy-options-via-sysfs-interface.patch | 139 ++++ ...nching-and-fsync-when-using-no_holes.patch | 693 ++++++++++++++++++ ...ting-tree-mod-seq-elements-and-nodes.patch | 237 ++++++ ...ns-drity-in-btrfs_commit_transaction.patch | 96 +++ .../drm-rect-avoid-division-by-zero.patch | 47 ++ ...track-eventfd_signal-recursion-depth.patch | 102 +++ .../gfs2-fix-o_sync-write-handling.patch | 111 +++ ...ove-setting-current-backing_dev_info.patch | 80 ++ ...row-error-when-trying-to-remove-igtk.patch | 56 ++ ..._next-should-increase-position-index.patch | 39 + ...-uninit-vcpu-if-vcore-creation-fails.patch | 44 ++ ...red-page-if-mmu-initialization-fails.patch | 41 ++ ...put_fpu-w-o-load_fpu-on-mpx-platform.patch | 55 ++ ...tations-from-spectre-v1-l1tf-attacks.patch | 57 ++ ...ndirect-from-spectre-v1-l1tf-attacks.patch | 58 ++ ...ndirect-from-spectre-v1-l1tf-attacks.patch | 40 + ...sh_data-from-spectre-v1-l1tf-attacks.patch | 59 ++ ...g_write-from-spectre-v1-l1tf-attacks.patch | 54 ++ ...rom-spectre-v1-l1tf-attacks-in-x86.c.patch | 54 ++ ...eg_unit-from-spectre-v1-l1tf-attacks.patch | 47 ++ ...n-pmu.h-from-spectre-v1-l1tf-attacks.patch | 69 ++ ...intel.c-from-spectre-v1-l1tf-attacks.patch | 76 ++ ...de_insn-from-spectre-v1-l1tf-attacks.patch | 48 ++ ...e-to-prevent-spectre-v1-l1tf-attacks.patch | 45 ++ ...g-to-prevent-spectre-v1-l1tf-attacks.patch | 57 ++ ...ized-before-registering-input-device.patch | 145 ++++ ...ocking-in-mwifiex_process_country_ie.patch | 35 + ...he-pages-need-to-be-locked-when-read.patch | 112 +++ ...mory-leaks-and-corruption-in-readdir.patch | 81 ++ ...xxx-fix-unbound-nvme-response-length.patch | 78 ++ queue-4.19/series | 36 + ...y_time-should-be-seconds-not-timeval.patch | 54 ++ ...ls-kvm_stat-fix-kvm_exit-filter-name.patch | 73 ++ ...ifier-handling-in-watchdog-core-code.patch | 197 +++++ ...upport-xend-based-toolstack-take-two.patch | 47 ++ 37 files changed, 3402 insertions(+) create mode 100644 queue-4.19/aio-prevent-potential-eventfd-recursion-on-poll.patch create mode 100644 queue-4.19/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch create mode 100644 queue-4.19/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch create mode 100644 queue-4.19/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch create mode 100644 queue-4.19/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch create mode 100644 queue-4.19/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch create mode 100644 queue-4.19/drm-rect-avoid-division-by-zero.patch create mode 100644 queue-4.19/eventfd-track-eventfd_signal-recursion-depth.patch create mode 100644 queue-4.19/gfs2-fix-o_sync-write-handling.patch create mode 100644 queue-4.19/gfs2-move-setting-current-backing_dev_info.patch create mode 100644 queue-4.19/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch create mode 100644 queue-4.19/jbd2_seq_info_next-should-increase-position-index.patch create mode 100644 queue-4.19/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch create mode 100644 queue-4.19/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch create mode 100644 queue-4.19/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch create mode 100644 queue-4.19/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch create mode 100644 queue-4.19/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch create mode 100644 queue-4.19/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch create mode 100644 queue-4.19/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch create mode 100644 queue-4.19/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch create mode 100644 queue-4.19/nfs-fix-memory-leaks-and-corruption-in-readdir.patch create mode 100644 queue-4.19/scsi-qla2xxx-fix-unbound-nvme-response-length.patch create mode 100644 queue-4.19/sunrpc-expiry_time-should-be-seconds-not-timeval.patch create mode 100644 queue-4.19/tools-kvm_stat-fix-kvm_exit-filter-name.patch create mode 100644 queue-4.19/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch create mode 100644 queue-4.19/xen-balloon-support-xend-based-toolstack-take-two.patch diff --git a/queue-4.19/aio-prevent-potential-eventfd-recursion-on-poll.patch b/queue-4.19/aio-prevent-potential-eventfd-recursion-on-poll.patch new file mode 100644 index 00000000000..b8e742e0fa4 --- /dev/null +++ b/queue-4.19/aio-prevent-potential-eventfd-recursion-on-poll.patch @@ -0,0 +1,70 @@ +From 01d7a356872eec22ef34a33a5f9cfa917d145468 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 3 Feb 2020 10:33:42 -0700 +Subject: aio: prevent potential eventfd recursion on poll + +From: Jens Axboe + +commit 01d7a356872eec22ef34a33a5f9cfa917d145468 upstream. + +If we have nested or circular eventfd wakeups, then we can deadlock if +we run them inline from our poll waitqueue wakeup handler. It's also +possible to have very long chains of notifications, to the extent where +we could risk blowing the stack. + +Check the eventfd recursion count before calling eventfd_signal(). If +it's non-zero, then punt the signaling to async context. This is always +safe, as it takes us out-of-line in terms of stack and locking context. + +Cc: stable@vger.kernel.org # 4.19+ +Reviewed-by: Jeff Moyer +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + fs/aio.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/fs/aio.c ++++ b/fs/aio.c +@@ -1600,6 +1600,14 @@ static int aio_fsync(struct fsync_iocb * + return 0; + } + ++static void aio_poll_put_work(struct work_struct *work) ++{ ++ struct poll_iocb *req = container_of(work, struct poll_iocb, work); ++ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); ++ ++ iocb_put(iocb); ++} ++ + static void aio_poll_complete_work(struct work_struct *work) + { + struct poll_iocb *req = container_of(work, struct poll_iocb, work); +@@ -1664,6 +1672,8 @@ static int aio_poll_wake(struct wait_que + list_del_init(&req->wait.entry); + + if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { ++ struct kioctx *ctx = iocb->ki_ctx; ++ + /* + * Try to complete the iocb inline if we can. Use + * irqsave/irqrestore because not all filesystems (e.g. fuse) +@@ -1673,8 +1683,14 @@ static int aio_poll_wake(struct wait_que + list_del(&iocb->ki_list); + iocb->ki_res.res = mangle_poll(mask); + req->done = true; +- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); +- iocb_put(iocb); ++ if (iocb->ki_eventfd && eventfd_signal_count()) { ++ iocb = NULL; ++ INIT_WORK(&req->work, aio_poll_put_work); ++ schedule_work(&req->work); ++ } ++ spin_unlock_irqrestore(&ctx->ctx_lock, flags); ++ if (iocb) ++ iocb_put(iocb); + } else { + schedule_work(&req->work); + } diff --git a/queue-4.19/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch b/queue-4.19/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch new file mode 100644 index 00000000000..4b48d3a3580 --- /dev/null +++ b/queue-4.19/arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch @@ -0,0 +1,70 @@ +From 1a3388d506bf5b45bb283e6a4c4706cfb4897333 Mon Sep 17 00:00:00 2001 +From: Stephen Warren +Date: Thu, 3 Oct 2019 14:50:31 -0600 +Subject: ARM: tegra: Enable PLLP bypass during Tegra124 LP1 + +From: Stephen Warren + +commit 1a3388d506bf5b45bb283e6a4c4706cfb4897333 upstream. + +For a little over a year, U-Boot has configured the flow controller to +perform automatic RAM re-repair on off->on power transitions of the CPU +rail[1]. This is mandatory for correct operation of Tegra124. However, +RAM re-repair relies on certain clocks, which the kernel must enable and +leave running. PLLP is one of those clocks. This clock is shut down +during LP1 in order to save power. Enable bypass (which I believe routes +osc_div_clk, essentially the crystal clock, to the PLL output) so that +this clock signal toggles even though the PLL is not active. This is +required so that LP1 power mode (system suspend) operates correctly. + +The bypass configuration must then be undone when resuming from LP1, so +that all peripheral clocks run at the expected rate. Without this, many +peripherals won't work correctly; for example, the UART baud rate would +be incorrect. + +NVIDIA's downstream kernel code only does this if not compiled for +Tegra30, so the added code is made conditional upon the chip ID. +NVIDIA's downstream code makes this change conditional upon the active +CPU cluster. The upstream kernel currently doesn't support cluster +switching, so this patch doesn't test the active CPU cluster ID. + +[1] 3cc7942a4ae5 ARM: tegra: implement RAM repair + +Reported-by: Jonathan Hunter +Cc: stable@vger.kernel.org +Signed-off-by: Stephen Warren +Signed-off-by: Thierry Reding +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/mach-tegra/sleep-tegra30.S | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/arch/arm/mach-tegra/sleep-tegra30.S ++++ b/arch/arm/mach-tegra/sleep-tegra30.S +@@ -382,6 +382,14 @@ _pll_m_c_x_done: + pll_locked r1, r0, CLK_RESET_PLLC_BASE + pll_locked r1, r0, CLK_RESET_PLLX_BASE + ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 ++ cmp r1, #TEGRA30 ++ beq 1f ++ ldr r1, [r0, #CLK_RESET_PLLP_BASE] ++ bic r1, r1, #(1<<31) @ disable PllP bypass ++ str r1, [r0, #CLK_RESET_PLLP_BASE] ++1: ++ + mov32 r7, TEGRA_TMRUS_BASE + ldr r1, [r7] + add r1, r1, #LOCK_DELAY +@@ -641,7 +649,10 @@ tegra30_switch_cpu_to_clk32k: + str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] + + /* disable PLLP, PLLA, PLLC and PLLX */ ++ tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 ++ cmp r1, #TEGRA30 + ldr r0, [r5, #CLK_RESET_PLLP_BASE] ++ orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster + bic r0, r0, #(1 << 30) + str r0, [r5, #CLK_RESET_PLLP_BASE] + ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/queue-4.19/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch b/queue-4.19/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch new file mode 100644 index 00000000000..b3b364fe452 --- /dev/null +++ b/queue-4.19/bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch @@ -0,0 +1,139 @@ +From 038ba8cc1bffc51250add4a9b9249d4331576d8f Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sat, 1 Feb 2020 22:42:33 +0800 +Subject: bcache: add readahead cache policy options via sysfs interface + +From: Coly Li + +commit 038ba8cc1bffc51250add4a9b9249d4331576d8f upstream. + +In year 2007 high performance SSD was still expensive, in order to +save more space for real workload or meta data, the readahead I/Os +for non-meta data was bypassed and not cached on SSD. + +In now days, SSD price drops a lot and people can find larger size +SSD with more comfortable price. It is unncessary to alway bypass +normal readahead I/Os to save SSD space for now. + +This patch adds options for readahead data cache policies via sysfs +file /sys/block/bcache/readahead_cache_policy, the options are, +- "all": cache all readahead data I/Os. +- "meta-only": only cache meta data, and bypass other regular I/Os. + +If users want to make bcache continue to only cache readahead request +for metadata and bypass regular data readahead, please set "meta-only" +to this sysfs file. By default, bcache will back to cache all read- +ahead requests now. + +Cc: stable@vger.kernel.org +Signed-off-by: Coly Li +Acked-by: Eric Wheeler +Cc: Michael Lyle +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/bcache/bcache.h | 3 +++ + drivers/md/bcache/request.c | 17 ++++++++++++----- + drivers/md/bcache/sysfs.c | 22 ++++++++++++++++++++++ + 3 files changed, 37 insertions(+), 5 deletions(-) + +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -329,6 +329,9 @@ struct cached_dev { + */ + atomic_t has_dirty; + ++#define BCH_CACHE_READA_ALL 0 ++#define BCH_CACHE_READA_META_ONLY 1 ++ unsigned int cache_readahead_policy; + struct bch_ratelimit writeback_rate; + struct delayed_work writeback_rate_update; + +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -391,13 +391,20 @@ static bool check_should_bypass(struct c + goto skip; + + /* +- * Flag for bypass if the IO is for read-ahead or background, +- * unless the read-ahead request is for metadata ++ * If the bio is for read-ahead or background IO, bypass it or ++ * not depends on the following situations, ++ * - If the IO is for meta data, always cache it and no bypass ++ * - If the IO is not meta data, check dc->cache_reada_policy, ++ * BCH_CACHE_READA_ALL: cache it and not bypass ++ * BCH_CACHE_READA_META_ONLY: not cache it and bypass ++ * That is, read-ahead request for metadata always get cached + * (eg, for gfs2 or xfs). + */ +- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && +- !(bio->bi_opf & (REQ_META|REQ_PRIO))) +- goto skip; ++ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { ++ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && ++ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) ++ goto skip; ++ } + + if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || + bio_sectors(bio) & (c->sb.block_size - 1)) { +--- a/drivers/md/bcache/sysfs.c ++++ b/drivers/md/bcache/sysfs.c +@@ -25,6 +25,12 @@ static const char * const bch_cache_mode + NULL + }; + ++static const char * const bch_reada_cache_policies[] = { ++ "all", ++ "meta-only", ++ NULL ++}; ++ + /* Default is -1; we skip past it for stop_when_cache_set_failed */ + static const char * const bch_stop_on_failure_modes[] = { + "auto", +@@ -94,6 +100,7 @@ rw_attribute(congested_write_threshold_u + rw_attribute(sequential_cutoff); + rw_attribute(data_csum); + rw_attribute(cache_mode); ++rw_attribute(readahead_cache_policy); + rw_attribute(stop_when_cache_set_failed); + rw_attribute(writeback_metadata); + rw_attribute(writeback_running); +@@ -160,6 +167,11 @@ SHOW(__bch_cached_dev) + bch_cache_modes, + BDEV_CACHE_MODE(&dc->sb)); + ++ if (attr == &sysfs_readahead_cache_policy) ++ return bch_snprint_string_list(buf, PAGE_SIZE, ++ bch_reada_cache_policies, ++ dc->cache_readahead_policy); ++ + if (attr == &sysfs_stop_when_cache_set_failed) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_stop_on_failure_modes, +@@ -324,6 +336,15 @@ STORE(__cached_dev) + } + } + ++ if (attr == &sysfs_readahead_cache_policy) { ++ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); ++ if (v < 0) ++ return v; ++ ++ if ((unsigned int) v != dc->cache_readahead_policy) ++ dc->cache_readahead_policy = v; ++ } ++ + if (attr == &sysfs_stop_when_cache_set_failed) { + v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); + if (v < 0) +@@ -417,6 +438,7 @@ static struct attribute *bch_cached_dev_ + &sysfs_data_csum, + #endif + &sysfs_cache_mode, ++ &sysfs_readahead_cache_policy, + &sysfs_stop_when_cache_set_failed, + &sysfs_writeback_metadata, + &sysfs_writeback_running, diff --git a/queue-4.19/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch b/queue-4.19/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch new file mode 100644 index 00000000000..172b38f2582 --- /dev/null +++ b/queue-4.19/btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch @@ -0,0 +1,693 @@ +From 0e56315ca147b3e60c7bf240233a301d3c7fb508 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 19 Nov 2019 12:07:33 +0000 +Subject: Btrfs: fix missing hole after hole punching and fsync when using NO_HOLES + +From: Filipe Manana + +commit 0e56315ca147b3e60c7bf240233a301d3c7fb508 upstream. + +When using the NO_HOLES feature, if we punch a hole into a file and then +fsync it, there are cases where a subsequent fsync will miss the fact that +a hole was punched, resulting in the holes not existing after replaying +the log tree. + +Essentially these cases all imply that, tree-log.c:copy_items(), is not +invoked for the leafs that delimit holes, because nothing changed those +leafs in the current transaction. And it's precisely copy_items() where +we currenly detect and log holes, which works as long as the holes are +between file extent items in the input leaf or between the beginning of +input leaf and the previous leaf or between the last item in the leaf +and the next leaf. + +First example where we miss a hole: + + *) The extent items of the inode span multiple leafs; + + *) The punched hole covers a range that affects only the extent items of + the first leaf; + + *) The fsync operation is done in full mode (BTRFS_INODE_NEEDS_FULL_SYNC + is set in the inode's runtime flags). + + That results in the hole not existing after replaying the log tree. + + For example, if the fs/subvolume tree has the following layout for a + particular inode: + + Leaf N, generation 10: + + [ ... INODE_ITEM INODE_REF EXTENT_ITEM (0 64K) EXTENT_ITEM (64K 128K) ] + + Leaf N + 1, generation 10: + + [ EXTENT_ITEM (128K 64K) ... ] + + If at transaction 11 we punch a hole coverting the range [0, 128K[, we end + up dropping the two extent items from leaf N, but we don't touch the other + leaf, so we end up in the following state: + + Leaf N, generation 11: + + [ ... INODE_ITEM INODE_REF ] + + Leaf N + 1, generation 10: + + [ EXTENT_ITEM (128K 64K) ... ] + + A full fsync after punching the hole will only process leaf N because it + was modified in the current transaction, but not leaf N + 1, since it + was not modified in the current transaction (generation 10 and not 11). + As a result the fsync will not log any holes, because it didn't process + any leaf with extent items. + +Second example where we will miss a hole: + + *) An inode as its items spanning 5 (or more) leafs; + + *) A hole is punched and it covers only the extents items of the 3rd + leaf. This resulsts in deleting the entire leaf and not touching any + of the other leafs. + + So the only leaf that is modified in the current transaction, when + punching the hole, is the first leaf, which contains the inode item. + During the full fsync, the only leaf that is passed to copy_items() + is that first leaf, and that's not enough for the hole detection + code in copy_items() to determine there's a hole between the last + file extent item in the 2nd leaf and the first file extent item in + the 3rd leaf (which was the 4th leaf before punching the hole). + +Fix this by scanning all leafs and punch holes as necessary when doing a +full fsync (less common than a non-full fsync) when the NO_HOLES feature +is enabled. The lack of explicit file extent items to mark holes makes it +necessary to scan existing extents to determine if holes exist. + +A test case for fstests follows soon. + +Fixes: 16e7549f045d33 ("Btrfs: incompatible format change to remove hole extents") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/tree-log.c | 388 +++++++++++++--------------------------------------- + 1 file changed, 100 insertions(+), 288 deletions(-) + +--- a/fs/btrfs/tree-log.c ++++ b/fs/btrfs/tree-log.c +@@ -3892,7 +3892,7 @@ static int log_csums(struct btrfs_trans_ + static noinline int copy_items(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *dst_path, +- struct btrfs_path *src_path, u64 *last_extent, ++ struct btrfs_path *src_path, + int start_slot, int nr, int inode_only, + u64 logged_isize) + { +@@ -3903,7 +3903,6 @@ static noinline int copy_items(struct bt + struct btrfs_file_extent_item *extent; + struct btrfs_inode_item *inode_item; + struct extent_buffer *src = src_path->nodes[0]; +- struct btrfs_key first_key, last_key, key; + int ret; + struct btrfs_key *ins_keys; + u32 *ins_sizes; +@@ -3911,9 +3910,6 @@ static noinline int copy_items(struct bt + int i; + struct list_head ordered_sums; + int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; +- bool has_extents = false; +- bool need_find_last_extent = true; +- bool done = false; + + INIT_LIST_HEAD(&ordered_sums); + +@@ -3922,8 +3918,6 @@ static noinline int copy_items(struct bt + if (!ins_data) + return -ENOMEM; + +- first_key.objectid = (u64)-1; +- + ins_sizes = (u32 *)ins_data; + ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); + +@@ -3944,9 +3938,6 @@ static noinline int copy_items(struct bt + + src_offset = btrfs_item_ptr_offset(src, start_slot + i); + +- if (i == nr - 1) +- last_key = ins_keys[i]; +- + if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(dst_path->nodes[0], + dst_path->slots[0], +@@ -3960,20 +3951,6 @@ static noinline int copy_items(struct bt + src_offset, ins_sizes[i]); + } + +- /* +- * We set need_find_last_extent here in case we know we were +- * processing other items and then walk into the first extent in +- * the inode. If we don't hit an extent then nothing changes, +- * we'll do the last search the next time around. +- */ +- if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { +- has_extents = true; +- if (first_key.objectid == (u64)-1) +- first_key = ins_keys[i]; +- } else { +- need_find_last_extent = false; +- } +- + /* take a reference on file data extents so that truncates + * or deletes of this inode don't have to relog the inode + * again +@@ -4039,167 +4016,6 @@ static noinline int copy_items(struct bt + kfree(sums); + } + +- if (!has_extents) +- return ret; +- +- if (need_find_last_extent && *last_extent == first_key.offset) { +- /* +- * We don't have any leafs between our current one and the one +- * we processed before that can have file extent items for our +- * inode (and have a generation number smaller than our current +- * transaction id). +- */ +- need_find_last_extent = false; +- } +- +- /* +- * Because we use btrfs_search_forward we could skip leaves that were +- * not modified and then assume *last_extent is valid when it really +- * isn't. So back up to the previous leaf and read the end of the last +- * extent before we go and fill in holes. +- */ +- if (need_find_last_extent) { +- u64 len; +- +- ret = btrfs_prev_leaf(inode->root, src_path); +- if (ret < 0) +- return ret; +- if (ret) +- goto fill_holes; +- if (src_path->slots[0]) +- src_path->slots[0]--; +- src = src_path->nodes[0]; +- btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); +- if (key.objectid != btrfs_ino(inode) || +- key.type != BTRFS_EXTENT_DATA_KEY) +- goto fill_holes; +- extent = btrfs_item_ptr(src, src_path->slots[0], +- struct btrfs_file_extent_item); +- if (btrfs_file_extent_type(src, extent) == +- BTRFS_FILE_EXTENT_INLINE) { +- len = btrfs_file_extent_ram_bytes(src, extent); +- *last_extent = ALIGN(key.offset + len, +- fs_info->sectorsize); +- } else { +- len = btrfs_file_extent_num_bytes(src, extent); +- *last_extent = key.offset + len; +- } +- } +-fill_holes: +- /* So we did prev_leaf, now we need to move to the next leaf, but a few +- * things could have happened +- * +- * 1) A merge could have happened, so we could currently be on a leaf +- * that holds what we were copying in the first place. +- * 2) A split could have happened, and now not all of the items we want +- * are on the same leaf. +- * +- * So we need to adjust how we search for holes, we need to drop the +- * path and re-search for the first extent key we found, and then walk +- * forward until we hit the last one we copied. +- */ +- if (need_find_last_extent) { +- /* btrfs_prev_leaf could return 1 without releasing the path */ +- btrfs_release_path(src_path); +- ret = btrfs_search_slot(NULL, inode->root, &first_key, +- src_path, 0, 0); +- if (ret < 0) +- return ret; +- ASSERT(ret == 0); +- src = src_path->nodes[0]; +- i = src_path->slots[0]; +- } else { +- i = start_slot; +- } +- +- /* +- * Ok so here we need to go through and fill in any holes we may have +- * to make sure that holes are punched for those areas in case they had +- * extents previously. +- */ +- while (!done) { +- u64 offset, len; +- u64 extent_end; +- +- if (i >= btrfs_header_nritems(src_path->nodes[0])) { +- ret = btrfs_next_leaf(inode->root, src_path); +- if (ret < 0) +- return ret; +- ASSERT(ret == 0); +- src = src_path->nodes[0]; +- i = 0; +- need_find_last_extent = true; +- } +- +- btrfs_item_key_to_cpu(src, &key, i); +- if (!btrfs_comp_cpu_keys(&key, &last_key)) +- done = true; +- if (key.objectid != btrfs_ino(inode) || +- key.type != BTRFS_EXTENT_DATA_KEY) { +- i++; +- continue; +- } +- extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); +- if (btrfs_file_extent_type(src, extent) == +- BTRFS_FILE_EXTENT_INLINE) { +- len = btrfs_file_extent_ram_bytes(src, extent); +- extent_end = ALIGN(key.offset + len, +- fs_info->sectorsize); +- } else { +- len = btrfs_file_extent_num_bytes(src, extent); +- extent_end = key.offset + len; +- } +- i++; +- +- if (*last_extent == key.offset) { +- *last_extent = extent_end; +- continue; +- } +- offset = *last_extent; +- len = key.offset - *last_extent; +- ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), +- offset, 0, 0, len, 0, len, 0, 0, 0); +- if (ret) +- break; +- *last_extent = extent_end; +- } +- +- /* +- * Check if there is a hole between the last extent found in our leaf +- * and the first extent in the next leaf. If there is one, we need to +- * log an explicit hole so that at replay time we can punch the hole. +- */ +- if (ret == 0 && +- key.objectid == btrfs_ino(inode) && +- key.type == BTRFS_EXTENT_DATA_KEY && +- i == btrfs_header_nritems(src_path->nodes[0])) { +- ret = btrfs_next_leaf(inode->root, src_path); +- need_find_last_extent = true; +- if (ret > 0) { +- ret = 0; +- } else if (ret == 0) { +- btrfs_item_key_to_cpu(src_path->nodes[0], &key, +- src_path->slots[0]); +- if (key.objectid == btrfs_ino(inode) && +- key.type == BTRFS_EXTENT_DATA_KEY && +- *last_extent < key.offset) { +- const u64 len = key.offset - *last_extent; +- +- ret = btrfs_insert_file_extent(trans, log, +- btrfs_ino(inode), +- *last_extent, 0, +- 0, len, 0, len, +- 0, 0, 0); +- *last_extent += len; +- } +- } +- } +- /* +- * Need to let the callers know we dropped the path so they should +- * re-search. +- */ +- if (!ret && need_find_last_extent) +- ret = 1; + return ret; + } + +@@ -4365,7 +4181,7 @@ static int btrfs_log_prealloc_extents(st + const u64 i_size = i_size_read(&inode->vfs_inode); + const u64 ino = btrfs_ino(inode); + struct btrfs_path *dst_path = NULL; +- u64 last_extent = (u64)-1; ++ bool dropped_extents = false; + int ins_nr = 0; + int start_slot; + int ret; +@@ -4387,8 +4203,7 @@ static int btrfs_log_prealloc_extents(st + if (slot >= btrfs_header_nritems(leaf)) { + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + goto out; + ins_nr = 0; +@@ -4412,8 +4227,7 @@ static int btrfs_log_prealloc_extents(st + path->slots[0]++; + continue; + } +- if (last_extent == (u64)-1) { +- last_extent = key.offset; ++ if (!dropped_extents) { + /* + * Avoid logging extent items logged in past fsync calls + * and leading to duplicate keys in the log tree. +@@ -4427,6 +4241,7 @@ static int btrfs_log_prealloc_extents(st + } while (ret == -EAGAIN); + if (ret) + goto out; ++ dropped_extents = true; + } + if (ins_nr == 0) + start_slot = slot; +@@ -4441,7 +4256,7 @@ static int btrfs_log_prealloc_extents(st + } + } + if (ins_nr > 0) { +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + start_slot, ins_nr, 1, 0); + if (ret > 0) + ret = 0; +@@ -4636,13 +4451,8 @@ static int btrfs_log_all_xattrs(struct b + + if (slot >= nritems) { + if (ins_nr > 0) { +- u64 last_extent = 0; +- + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); +- /* can't be 1, extent items aren't processed */ +- ASSERT(ret <= 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + return ret; + ins_nr = 0; +@@ -4666,13 +4476,8 @@ static int btrfs_log_all_xattrs(struct b + cond_resched(); + } + if (ins_nr > 0) { +- u64 last_extent = 0; +- + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, start_slot, +- ins_nr, 1, 0); +- /* can't be 1, extent items aren't processed */ +- ASSERT(ret <= 0); ++ start_slot, ins_nr, 1, 0); + if (ret < 0) + return ret; + } +@@ -4681,100 +4486,119 @@ static int btrfs_log_all_xattrs(struct b + } + + /* +- * If the no holes feature is enabled we need to make sure any hole between the +- * last extent and the i_size of our inode is explicitly marked in the log. This +- * is to make sure that doing something like: +- * +- * 1) create file with 128Kb of data +- * 2) truncate file to 64Kb +- * 3) truncate file to 256Kb +- * 4) fsync file +- * 5) +- * 6) mount fs and trigger log replay +- * +- * Will give us a file with a size of 256Kb, the first 64Kb of data match what +- * the file had in its first 64Kb of data at step 1 and the last 192Kb of the +- * file correspond to a hole. The presence of explicit holes in a log tree is +- * what guarantees that log replay will remove/adjust file extent items in the +- * fs/subvol tree. +- * +- * Here we do not need to care about holes between extents, that is already done +- * by copy_items(). We also only need to do this in the full sync path, where we +- * lookup for extents from the fs/subvol tree only. In the fast path case, we +- * lookup the list of modified extent maps and if any represents a hole, we +- * insert a corresponding extent representing a hole in the log tree. ++ * When using the NO_HOLES feature if we punched a hole that causes the ++ * deletion of entire leafs or all the extent items of the first leaf (the one ++ * that contains the inode item and references) we may end up not processing ++ * any extents, because there are no leafs with a generation matching the ++ * current transaction that have extent items for our inode. So we need to find ++ * if any holes exist and then log them. We also need to log holes after any ++ * truncate operation that changes the inode's size. + */ +-static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, +- struct btrfs_root *root, +- struct btrfs_inode *inode, +- struct btrfs_path *path) ++static int btrfs_log_holes(struct btrfs_trans_handle *trans, ++ struct btrfs_root *root, ++ struct btrfs_inode *inode, ++ struct btrfs_path *path) + { + struct btrfs_fs_info *fs_info = root->fs_info; +- int ret; + struct btrfs_key key; +- u64 hole_start; +- u64 hole_size; +- struct extent_buffer *leaf; +- struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(&inode->vfs_inode); ++ u64 prev_extent_end = 0; ++ int ret; + +- if (!btrfs_fs_incompat(fs_info, NO_HOLES)) ++ if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; +- key.offset = (u64)-1; ++ key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +- ASSERT(ret != 0); + if (ret < 0) + return ret; + +- ASSERT(path->slots[0] > 0); +- path->slots[0]--; +- leaf = path->nodes[0]; +- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); +- +- if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { +- /* inode does not have any extents */ +- hole_start = 0; +- hole_size = i_size; +- } else { ++ while (true) { + struct btrfs_file_extent_item *extent; ++ struct extent_buffer *leaf = path->nodes[0]; + u64 len; + +- /* +- * If there's an extent beyond i_size, an explicit hole was +- * already inserted by copy_items(). +- */ +- if (key.offset >= i_size) +- return 0; ++ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ++ ret = btrfs_next_leaf(root, path); ++ if (ret < 0) ++ return ret; ++ if (ret > 0) { ++ ret = 0; ++ break; ++ } ++ leaf = path->nodes[0]; ++ } ++ ++ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); ++ if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) ++ break; ++ ++ /* We have a hole, log it. */ ++ if (prev_extent_end < key.offset) { ++ const u64 hole_len = key.offset - prev_extent_end; ++ ++ /* ++ * Release the path to avoid deadlocks with other code ++ * paths that search the root while holding locks on ++ * leafs from the log root. ++ */ ++ btrfs_release_path(path); ++ ret = btrfs_insert_file_extent(trans, root->log_root, ++ ino, prev_extent_end, 0, ++ 0, hole_len, 0, hole_len, ++ 0, 0, 0); ++ if (ret < 0) ++ return ret; ++ ++ /* ++ * Search for the same key again in the root. Since it's ++ * an extent item and we are holding the inode lock, the ++ * key must still exist. If it doesn't just emit warning ++ * and return an error to fall back to a transaction ++ * commit. ++ */ ++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ++ if (ret < 0) ++ return ret; ++ if (WARN_ON(ret > 0)) ++ return -ENOENT; ++ leaf = path->nodes[0]; ++ } + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); +- + if (btrfs_file_extent_type(leaf, extent) == +- BTRFS_FILE_EXTENT_INLINE) +- return 0; ++ BTRFS_FILE_EXTENT_INLINE) { ++ len = btrfs_file_extent_ram_bytes(leaf, extent); ++ prev_extent_end = ALIGN(key.offset + len, ++ fs_info->sectorsize); ++ } else { ++ len = btrfs_file_extent_num_bytes(leaf, extent); ++ prev_extent_end = key.offset + len; ++ } + +- len = btrfs_file_extent_num_bytes(leaf, extent); +- /* Last extent goes beyond i_size, no need to log a hole. */ +- if (key.offset + len > i_size) +- return 0; +- hole_start = key.offset + len; +- hole_size = i_size - hole_start; ++ path->slots[0]++; ++ cond_resched(); + } +- btrfs_release_path(path); + +- /* Last extent ends at i_size. */ +- if (hole_size == 0) +- return 0; ++ if (prev_extent_end < i_size) { ++ u64 hole_len; + +- hole_size = ALIGN(hole_size, fs_info->sectorsize); +- ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, +- hole_size, 0, hole_size, 0, 0, 0); +- return ret; ++ btrfs_release_path(path); ++ hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); ++ ret = btrfs_insert_file_extent(trans, root->log_root, ++ ino, prev_extent_end, 0, 0, ++ hole_len, 0, hole_len, ++ 0, 0, 0); ++ if (ret < 0) ++ return ret; ++ } ++ ++ return 0; + } + + /* +@@ -4934,7 +4758,6 @@ static int btrfs_log_inode(struct btrfs_ + struct btrfs_key min_key; + struct btrfs_key max_key; + struct btrfs_root *log = root->log_root; +- u64 last_extent = 0; + int err = 0; + int ret; + int nritems; +@@ -5108,7 +4931,7 @@ again: + ins_start_slot = path->slots[0]; + } + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, + logged_isize); + if (ret < 0) { +@@ -5161,17 +4984,13 @@ again: + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; +- if (ret) { +- btrfs_release_path(path); +- continue; +- } + goto next_slot; + } + +@@ -5184,18 +5003,13 @@ again: + goto next_slot; + } + +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + ins_start_slot, ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- if (ret) { +- ins_nr = 0; +- btrfs_release_path(path); +- continue; +- } + ins_nr = 1; + ins_start_slot = path->slots[0]; + next_slot: +@@ -5209,13 +5023,12 @@ next_slot: + } + if (ins_nr) { + ret = copy_items(trans, inode, dst_path, path, +- &last_extent, ins_start_slot, ++ ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- ret = 0; + ins_nr = 0; + } + btrfs_release_path(path); +@@ -5230,14 +5043,13 @@ next_key: + } + } + if (ins_nr) { +- ret = copy_items(trans, inode, dst_path, path, &last_extent, ++ ret = copy_items(trans, inode, dst_path, path, + ins_start_slot, ins_nr, inode_only, + logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } +- ret = 0; + ins_nr = 0; + } + +@@ -5250,7 +5062,7 @@ next_key: + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); +- err = btrfs_log_trailing_hole(trans, root, inode, path); ++ err = btrfs_log_holes(trans, root, inode, path); + if (err) + goto out_unlock; + } diff --git a/queue-4.19/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch b/queue-4.19/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch new file mode 100644 index 00000000000..0dbdfedf960 --- /dev/null +++ b/queue-4.19/btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch @@ -0,0 +1,237 @@ +From 7227ff4de55d931bbdc156c8ef0ce4f100c78a5b Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 22 Jan 2020 12:23:20 +0000 +Subject: Btrfs: fix race between adding and putting tree mod seq elements and nodes + +From: Filipe Manana + +commit 7227ff4de55d931bbdc156c8ef0ce4f100c78a5b upstream. + +There is a race between adding and removing elements to the tree mod log +list and rbtree that can lead to use-after-free problems. + +Consider the following example that explains how/why the problems happens: + +1) Task A has mod log element with sequence number 200. It currently is + the only element in the mod log list; + +2) Task A calls btrfs_put_tree_mod_seq() because it no longer needs to + access the tree mod log. When it enters the function, it initializes + 'min_seq' to (u64)-1. Then it acquires the lock 'tree_mod_seq_lock' + before checking if there are other elements in the mod seq list. + Since the list it empty, 'min_seq' remains set to (u64)-1. Then it + unlocks the lock 'tree_mod_seq_lock'; + +3) Before task A acquires the lock 'tree_mod_log_lock', task B adds + itself to the mod seq list through btrfs_get_tree_mod_seq() and gets a + sequence number of 201; + +4) Some other task, name it task C, modifies a btree and because there + elements in the mod seq list, it adds a tree mod elem to the tree + mod log rbtree. That node added to the mod log rbtree is assigned + a sequence number of 202; + +5) Task B, which is doing fiemap and resolving indirect back references, + calls btrfs get_old_root(), with 'time_seq' == 201, which in turn + calls tree_mod_log_search() - the search returns the mod log node + from the rbtree with sequence number 202, created by task C; + +6) Task A now acquires the lock 'tree_mod_log_lock', starts iterating + the mod log rbtree and finds the node with sequence number 202. Since + 202 is less than the previously computed 'min_seq', (u64)-1, it + removes the node and frees it; + +7) Task B still has a pointer to the node with sequence number 202, and + it dereferences the pointer itself and through the call to + __tree_mod_log_rewind(), resulting in a use-after-free problem. + +This issue can be triggered sporadically with the test case generic/561 +from fstests, and it happens more frequently with a higher number of +duperemove processes. When it happens to me, it either freezes the VM or +it produces a trace like the following before crashing: + + [ 1245.321140] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI + [ 1245.321200] CPU: 1 PID: 26997 Comm: pool Not tainted 5.5.0-rc6-btrfs-next-52 #1 + [ 1245.321235] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 + [ 1245.321287] RIP: 0010:rb_next+0x16/0x50 + [ 1245.321307] Code: .... + [ 1245.321372] RSP: 0018:ffffa151c4d039b0 EFLAGS: 00010202 + [ 1245.321388] RAX: 6b6b6b6b6b6b6b6b RBX: ffff8ae221363c80 RCX: 6b6b6b6b6b6b6b6b + [ 1245.321409] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8ae221363c80 + [ 1245.321439] RBP: ffff8ae20fcc4688 R08: 0000000000000002 R09: 0000000000000000 + [ 1245.321475] R10: ffff8ae20b120910 R11: 00000000243f8bb1 R12: 0000000000000038 + [ 1245.321506] R13: ffff8ae221363c80 R14: 000000000000075f R15: ffff8ae223f762b8 + [ 1245.321539] FS: 00007fdee1ec7700(0000) GS:ffff8ae236c80000(0000) knlGS:0000000000000000 + [ 1245.321591] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 1245.321614] CR2: 00007fded4030c48 CR3: 000000021da16003 CR4: 00000000003606e0 + [ 1245.321642] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [ 1245.321668] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [ 1245.321706] Call Trace: + [ 1245.321798] __tree_mod_log_rewind+0xbf/0x280 [btrfs] + [ 1245.321841] btrfs_search_old_slot+0x105/0xd00 [btrfs] + [ 1245.321877] resolve_indirect_refs+0x1eb/0xc60 [btrfs] + [ 1245.321912] find_parent_nodes+0x3dc/0x11b0 [btrfs] + [ 1245.321947] btrfs_check_shared+0x115/0x1c0 [btrfs] + [ 1245.321980] ? extent_fiemap+0x59d/0x6d0 [btrfs] + [ 1245.322029] extent_fiemap+0x59d/0x6d0 [btrfs] + [ 1245.322066] do_vfs_ioctl+0x45a/0x750 + [ 1245.322081] ksys_ioctl+0x70/0x80 + [ 1245.322092] ? trace_hardirqs_off_thunk+0x1a/0x1c + [ 1245.322113] __x64_sys_ioctl+0x16/0x20 + [ 1245.322126] do_syscall_64+0x5c/0x280 + [ 1245.322139] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [ 1245.322155] RIP: 0033:0x7fdee3942dd7 + [ 1245.322177] Code: .... + [ 1245.322258] RSP: 002b:00007fdee1ec6c88 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + [ 1245.322294] RAX: ffffffffffffffda RBX: 00007fded40210d8 RCX: 00007fdee3942dd7 + [ 1245.322314] RDX: 00007fded40210d8 RSI: 00000000c020660b RDI: 0000000000000004 + [ 1245.322337] RBP: 0000562aa89e7510 R08: 0000000000000000 R09: 00007fdee1ec6d44 + [ 1245.322369] R10: 0000000000000073 R11: 0000000000000246 R12: 00007fdee1ec6d48 + [ 1245.322390] R13: 00007fdee1ec6d40 R14: 00007fded40210d0 R15: 00007fdee1ec6d50 + [ 1245.322423] Modules linked in: .... + [ 1245.323443] ---[ end trace 01de1e9ec5dff3cd ]--- + +Fix this by ensuring that btrfs_put_tree_mod_seq() computes the minimum +sequence number and iterates the rbtree while holding the lock +'tree_mod_log_lock' in write mode. Also get rid of the 'tree_mod_seq_lock' +lock, since it is now redundant. + +Fixes: bd989ba359f2ac ("Btrfs: add tree modification log functions") +Fixes: 097b8a7c9e48e2 ("Btrfs: join tree mod log code with the code holding back delayed refs") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Reviewed-by: Nikolay Borisov +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ctree.c | 8 ++------ + fs/btrfs/ctree.h | 6 ++---- + fs/btrfs/delayed-ref.c | 8 ++++---- + fs/btrfs/disk-io.c | 1 - + fs/btrfs/tests/btrfs-tests.c | 1 - + 5 files changed, 8 insertions(+), 16 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -337,12 +337,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_ + struct seq_list *elem) + { + write_lock(&fs_info->tree_mod_log_lock); +- spin_lock(&fs_info->tree_mod_seq_lock); + if (!elem->seq) { + elem->seq = btrfs_inc_tree_mod_seq(fs_info); + list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); + } +- spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); + + return elem->seq; +@@ -362,7 +360,7 @@ void btrfs_put_tree_mod_seq(struct btrfs + if (!seq_putting) + return; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ write_lock(&fs_info->tree_mod_log_lock); + list_del(&elem->list); + elem->seq = 0; + +@@ -373,19 +371,17 @@ void btrfs_put_tree_mod_seq(struct btrfs + * blocker with lower sequence number exists, we + * cannot remove anything from the log + */ +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ write_unlock(&fs_info->tree_mod_log_lock); + return; + } + min_seq = cur_elem->seq; + } + } +- spin_unlock(&fs_info->tree_mod_seq_lock); + + /* + * anything that's lower than the lowest existing (read: blocked) + * sequence number can be removed from the tree. + */ +- write_lock(&fs_info->tree_mod_log_lock); + tm_root = &fs_info->tree_mod_log; + for (node = rb_first(tm_root); node; node = next) { + next = rb_next(node); +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -893,14 +893,12 @@ struct btrfs_fs_info { + struct list_head delayed_iputs; + struct mutex cleaner_delayed_iput_mutex; + +- /* this protects tree_mod_seq_list */ +- spinlock_t tree_mod_seq_lock; + atomic64_t tree_mod_seq; +- struct list_head tree_mod_seq_list; + +- /* this protects tree_mod_log */ ++ /* this protects tree_mod_log and tree_mod_seq_list */ + rwlock_t tree_mod_log_lock; + struct rb_root tree_mod_log; ++ struct list_head tree_mod_seq_list; + + atomic_t async_delalloc_pages; + +--- a/fs/btrfs/delayed-ref.c ++++ b/fs/btrfs/delayed-ref.c +@@ -301,7 +301,7 @@ void btrfs_merge_delayed_refs(struct btr + if (head->is_data) + return; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ read_lock(&fs_info->tree_mod_log_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + +@@ -309,7 +309,7 @@ void btrfs_merge_delayed_refs(struct btr + struct seq_list, list); + seq = elem->seq; + } +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ read_unlock(&fs_info->tree_mod_log_lock); + + again: + for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { +@@ -326,7 +326,7 @@ int btrfs_check_delayed_seq(struct btrfs + struct seq_list *elem; + int ret = 0; + +- spin_lock(&fs_info->tree_mod_seq_lock); ++ read_lock(&fs_info->tree_mod_log_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); +@@ -339,7 +339,7 @@ int btrfs_check_delayed_seq(struct btrfs + } + } + +- spin_unlock(&fs_info->tree_mod_seq_lock); ++ read_unlock(&fs_info->tree_mod_log_lock); + return ret; + } + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2645,7 +2645,6 @@ int open_ctree(struct super_block *sb, + spin_lock_init(&fs_info->fs_roots_radix_lock); + spin_lock_init(&fs_info->delayed_iput_lock); + spin_lock_init(&fs_info->defrag_inodes_lock); +- spin_lock_init(&fs_info->tree_mod_seq_lock); + spin_lock_init(&fs_info->super_lock); + spin_lock_init(&fs_info->qgroup_op_lock); + spin_lock_init(&fs_info->buffer_lock); +--- a/fs/btrfs/tests/btrfs-tests.c ++++ b/fs/btrfs/tests/btrfs-tests.c +@@ -102,7 +102,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_ + spin_lock_init(&fs_info->qgroup_op_lock); + spin_lock_init(&fs_info->super_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); +- spin_lock_init(&fs_info->tree_mod_seq_lock); + mutex_init(&fs_info->qgroup_ioctl_lock); + mutex_init(&fs_info->qgroup_rescan_lock); + rwlock_init(&fs_info->tree_mod_log_lock); diff --git a/queue-4.19/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch b/queue-4.19/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch new file mode 100644 index 00000000000..a3e12017148 --- /dev/null +++ b/queue-4.19/btrfs-set-trans-drity-in-btrfs_commit_transaction.patch @@ -0,0 +1,96 @@ +From d62b23c94952e78211a383b7d90ef0afbd9a3717 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 17 Jan 2020 08:57:51 -0500 +Subject: btrfs: set trans->drity in btrfs_commit_transaction + +From: Josef Bacik + +commit d62b23c94952e78211a383b7d90ef0afbd9a3717 upstream. + +If we abort a transaction we have the following sequence + +if (!trans->dirty && list_empty(&trans->new_bgs)) + return; +WRITE_ONCE(trans->transaction->aborted, err); + +The idea being if we didn't modify anything with our trans handle then +we don't really need to abort the whole transaction, maybe the other +trans handles are fine and we can carry on. + +However in the case of create_snapshot we add a pending_snapshot object +to our transaction and then commit the transaction. We don't actually +modify anything. sync() behaves the same way, attach to an existing +transaction and commit it. This means that if we have an IO error in +the right places we could abort the committing transaction with our +trans->dirty being not set and thus not set transaction->aborted. + +This is a problem because in the create_snapshot() case we depend on +pending->error being set to something, or btrfs_commit_transaction +returning an error. + +If we are not the trans handle that gets to commit the transaction, and +we're waiting on the commit to happen we get our return value from +cur_trans->aborted. If this was not set to anything because sync() hit +an error in the transaction commit before it could modify anything then +cur_trans->aborted would be 0. Thus we'd return 0 from +btrfs_commit_transaction() in create_snapshot. + +This is a problem because we then try to do things with +pending_snapshot->snap, which will be NULL because we didn't create the +snapshot, and then we'll get a NULL pointer dereference like the +following + +"BUG: kernel NULL pointer dereference, address: 00000000000001f0" +RIP: 0010:btrfs_orphan_cleanup+0x2d/0x330 +Call Trace: + ? btrfs_mksubvol.isra.31+0x3f2/0x510 + btrfs_mksubvol.isra.31+0x4bc/0x510 + ? __sb_start_write+0xfa/0x200 + ? mnt_want_write_file+0x24/0x50 + btrfs_ioctl_snap_create_transid+0x16c/0x1a0 + btrfs_ioctl_snap_create_v2+0x11e/0x1a0 + btrfs_ioctl+0x1534/0x2c10 + ? free_debug_processing+0x262/0x2a3 + do_vfs_ioctl+0xa6/0x6b0 + ? do_sys_open+0x188/0x220 + ? syscall_trace_enter+0x1f8/0x330 + ksys_ioctl+0x60/0x90 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x4a/0x1b0 + +In order to fix this we need to make sure anybody who calls +commit_transaction has trans->dirty set so that they properly set the +trans->transaction->aborted value properly so any waiters know bad +things happened. + +This was found while I was running generic/475 with my modified +fsstress, it reproduced within a few runs. I ran with this patch all +night and didn't see the problem again. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/transaction.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -1936,6 +1936,14 @@ int btrfs_commit_transaction(struct btrf + struct btrfs_transaction *prev_trans = NULL; + int ret; + ++ /* ++ * Some places just start a transaction to commit it. We need to make ++ * sure that if this commit fails that the abort code actually marks the ++ * transaction as failed, so set trans->dirty to make the abort code do ++ * the right thing. ++ */ ++ trans->dirty = true; ++ + /* Stop the commit early if ->aborted is set */ + if (unlikely(READ_ONCE(cur_trans->aborted))) { + ret = cur_trans->aborted; diff --git a/queue-4.19/drm-rect-avoid-division-by-zero.patch b/queue-4.19/drm-rect-avoid-division-by-zero.patch new file mode 100644 index 00000000000..012d065365b --- /dev/null +++ b/queue-4.19/drm-rect-avoid-division-by-zero.patch @@ -0,0 +1,47 @@ +From 433480c1afd44f3e1e664b85063d98cefeefa0ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= +Date: Fri, 22 Nov 2019 19:56:20 +0200 +Subject: drm/rect: Avoid division by zero +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ville Syrjälä + +commit 433480c1afd44f3e1e664b85063d98cefeefa0ed upstream. + +Check for zero width/height destination rectangle in +drm_rect_clip_scaled() to avoid a division by zero. + +Cc: stable@vger.kernel.org +Fixes: f96bdf564f3e ("drm/rect: Handle rounding errors in drm_rect_clip_scaled, v3.") +Cc: Maarten Lankhorst +Cc: Benjamin Gaignard +Cc: Daniel Vetter +Testcase: igt/kms_selftest/drm_rect_clip_scaled_div_by_zero +Signed-off-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20191122175623.13565-2-ville.syrjala@linux.intel.com +Reviewed-by: Daniel Vetter +Reviewed-by: Benjamin Gaignard +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/drm_rect.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_rect.c ++++ b/drivers/gpu/drm/drm_rect.c +@@ -52,7 +52,12 @@ EXPORT_SYMBOL(drm_rect_intersect); + + static u32 clip_scaled(u32 src, u32 dst, u32 clip) + { +- u64 tmp = mul_u32_u32(src, dst - clip); ++ u64 tmp; ++ ++ if (dst == 0) ++ return 0; ++ ++ tmp = mul_u32_u32(src, dst - clip); + + /* + * Round toward 1.0 when clipping so that we don't accidentally diff --git a/queue-4.19/eventfd-track-eventfd_signal-recursion-depth.patch b/queue-4.19/eventfd-track-eventfd_signal-recursion-depth.patch new file mode 100644 index 00000000000..2431e3f7312 --- /dev/null +++ b/queue-4.19/eventfd-track-eventfd_signal-recursion-depth.patch @@ -0,0 +1,102 @@ +From b5e683d5cab8cd433b06ae178621f083cabd4f63 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 2 Feb 2020 08:23:03 -0700 +Subject: eventfd: track eventfd_signal() recursion depth + +From: Jens Axboe + +commit b5e683d5cab8cd433b06ae178621f083cabd4f63 upstream. + +eventfd use cases from aio and io_uring can deadlock due to circular +or resursive calling, when eventfd_signal() tries to grab the waitqueue +lock. On top of that, it's also possible to construct notification +chains that are deep enough that we could blow the stack. + +Add a percpu counter that tracks the percpu recursion depth, warn if we +exceed it. The counter is also exposed so that users of eventfd_signal() +can do the right thing if it's non-zero in the context where it is +called. + +Cc: stable@vger.kernel.org # 4.19+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + fs/eventfd.c | 15 +++++++++++++++ + include/linux/eventfd.h | 14 ++++++++++++++ + 2 files changed, 29 insertions(+) + +--- a/fs/eventfd.c ++++ b/fs/eventfd.c +@@ -22,6 +22,8 @@ + #include + #include + ++DEFINE_PER_CPU(int, eventfd_wake_count); ++ + struct eventfd_ctx { + struct kref kref; + wait_queue_head_t wqh; +@@ -55,12 +57,25 @@ __u64 eventfd_signal(struct eventfd_ctx + { + unsigned long flags; + ++ /* ++ * Deadlock or stack overflow issues can happen if we recurse here ++ * through waitqueue wakeup handlers. If the caller users potentially ++ * nested waitqueues with custom wakeup handlers, then it should ++ * check eventfd_signal_count() before calling this function. If ++ * it returns true, the eventfd_signal() call should be deferred to a ++ * safe context. ++ */ ++ if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) ++ return 0; ++ + spin_lock_irqsave(&ctx->wqh.lock, flags); ++ this_cpu_inc(eventfd_wake_count); + if (ULLONG_MAX - ctx->count < n) + n = ULLONG_MAX - ctx->count; + ctx->count += n; + if (waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, EPOLLIN); ++ this_cpu_dec(eventfd_wake_count); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return n; +--- a/include/linux/eventfd.h ++++ b/include/linux/eventfd.h +@@ -12,6 +12,8 @@ + #include + #include + #include ++#include ++#include + + /* + * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining +@@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx + int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, + __u64 *cnt); + ++DECLARE_PER_CPU(int, eventfd_wake_count); ++ ++static inline bool eventfd_signal_count(void) ++{ ++ return this_cpu_read(eventfd_wake_count); ++} ++ + #else /* CONFIG_EVENTFD */ + + /* +@@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wai + return -ENOSYS; + } + ++static inline bool eventfd_signal_count(void) ++{ ++ return false; ++} ++ + #endif + + #endif /* _LINUX_EVENTFD_H */ diff --git a/queue-4.19/gfs2-fix-o_sync-write-handling.patch b/queue-4.19/gfs2-fix-o_sync-write-handling.patch new file mode 100644 index 00000000000..752e0ee8626 --- /dev/null +++ b/queue-4.19/gfs2-fix-o_sync-write-handling.patch @@ -0,0 +1,111 @@ +From 6e5e41e2dc4e4413296d5a4af54ac92d7cd52317 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Tue, 14 Jan 2020 17:12:18 +0100 +Subject: gfs2: fix O_SYNC write handling + +From: Andreas Gruenbacher + +commit 6e5e41e2dc4e4413296d5a4af54ac92d7cd52317 upstream. + +In gfs2_file_write_iter, for direct writes, the error checking in the buffered +write fallback case is incomplete. This can cause inode write errors to go +undetected. Fix and clean up gfs2_file_write_iter along the way. + +Based on a proposed fix by Christoph Hellwig . + +Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support") +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/file.c | 51 +++++++++++++++++++++------------------------------ + 1 file changed, 21 insertions(+), 30 deletions(-) + +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -780,7 +780,7 @@ static ssize_t gfs2_file_write_iter(stru + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); +- ssize_t written = 0, ret; ++ ssize_t ret; + + ret = gfs2_rsqa_alloc(ip); + if (ret) +@@ -812,55 +812,46 @@ static ssize_t gfs2_file_write_iter(stru + + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; +- loff_t pos, endbyte; +- ssize_t buffered; ++ ssize_t buffered, ret2; + +- written = gfs2_file_direct_write(iocb, from); +- if (written < 0 || !iov_iter_count(from)) ++ ret = gfs2_file_direct_write(iocb, from); ++ if (ret < 0 || !iov_iter_count(from)) + goto out_unlock; + ++ iocb->ki_flags |= IOCB_DSYNC; + current->backing_dev_info = inode_to_bdi(inode); +- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); ++ buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; +- if (unlikely(ret < 0)) ++ if (unlikely(buffered <= 0)) + goto out_unlock; +- buffered = ret; + + /* + * We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT +- * semantics. ++ * semantics. If the writeback or invalidate fails, only report ++ * the direct I/O range as we don't know if the buffered pages ++ * made it to disk. + */ +- pos = iocb->ki_pos; +- endbyte = pos + buffered - 1; +- ret = filemap_write_and_wait_range(mapping, pos, endbyte); +- if (!ret) { +- iocb->ki_pos += buffered; +- written += buffered; +- invalidate_mapping_pages(mapping, +- pos >> PAGE_SHIFT, +- endbyte >> PAGE_SHIFT); +- } else { +- /* +- * We don't know how much we wrote, so just return +- * the number of bytes which were direct-written +- */ +- } ++ iocb->ki_pos += buffered; ++ ret2 = generic_write_sync(iocb, buffered); ++ invalidate_mapping_pages(mapping, ++ (iocb->ki_pos - buffered) >> PAGE_SHIFT, ++ (iocb->ki_pos - 1) >> PAGE_SHIFT); ++ if (!ret || ret2 > 0) ++ ret += ret2; + } else { + current->backing_dev_info = inode_to_bdi(inode); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; +- if (likely(ret > 0)) ++ if (likely(ret > 0)) { + iocb->ki_pos += ret; ++ ret = generic_write_sync(iocb, ret); ++ } + } + + out_unlock: + inode_unlock(inode); +- if (likely(ret > 0)) { +- /* Handle various SYNC-type writes */ +- ret = generic_write_sync(iocb, ret); +- } +- return written ? written : ret; ++ return ret; + } + + static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, diff --git a/queue-4.19/gfs2-move-setting-current-backing_dev_info.patch b/queue-4.19/gfs2-move-setting-current-backing_dev_info.patch new file mode 100644 index 00000000000..4b454873adf --- /dev/null +++ b/queue-4.19/gfs2-move-setting-current-backing_dev_info.patch @@ -0,0 +1,80 @@ +From 4c0e8dda608a51855225c611b5c6b442f95fbc56 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Wed, 15 Jan 2020 16:38:29 +0100 +Subject: gfs2: move setting current->backing_dev_info + +From: Christoph Hellwig + +commit 4c0e8dda608a51855225c611b5c6b442f95fbc56 upstream. + +Set current->backing_dev_info just around the buffered write calls to +prepare for the next fix. + +Fixes: 967bcc91b044 ("gfs2: iomap direct I/O support") +Cc: stable@vger.kernel.org # v4.19+ +Signed-off-by: Christoph Hellwig +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/file.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -800,18 +800,15 @@ static ssize_t gfs2_file_write_iter(stru + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) +- goto out; +- +- /* We can write back this queue in page reclaim */ +- current->backing_dev_info = inode_to_bdi(inode); ++ goto out_unlock; + + ret = file_remove_privs(file); + if (ret) +- goto out2; ++ goto out_unlock; + + ret = file_update_time(file); + if (ret) +- goto out2; ++ goto out_unlock; + + if (iocb->ki_flags & IOCB_DIRECT) { + struct address_space *mapping = file->f_mapping; +@@ -820,11 +817,13 @@ static ssize_t gfs2_file_write_iter(stru + + written = gfs2_file_direct_write(iocb, from); + if (written < 0 || !iov_iter_count(from)) +- goto out2; ++ goto out_unlock; + ++ current->backing_dev_info = inode_to_bdi(inode); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); ++ current->backing_dev_info = NULL; + if (unlikely(ret < 0)) +- goto out2; ++ goto out_unlock; + buffered = ret; + + /* +@@ -848,14 +847,14 @@ static ssize_t gfs2_file_write_iter(stru + */ + } + } else { ++ current->backing_dev_info = inode_to_bdi(inode); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); ++ current->backing_dev_info = NULL; + if (likely(ret > 0)) + iocb->ki_pos += ret; + } + +-out2: +- current->backing_dev_info = NULL; +-out: ++out_unlock: + inode_unlock(inode); + if (likely(ret > 0)) { + /* Handle various SYNC-type writes */ diff --git a/queue-4.19/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch b/queue-4.19/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch new file mode 100644 index 00000000000..0bbcdf9e98b --- /dev/null +++ b/queue-4.19/iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch @@ -0,0 +1,56 @@ +From 197288d5ba8a5289f22d3aeb4fca3824bfd9b4af Mon Sep 17 00:00:00 2001 +From: Luca Coelho +Date: Fri, 31 Jan 2020 15:45:25 +0200 +Subject: iwlwifi: don't throw error when trying to remove IGTK + +From: Luca Coelho + +commit 197288d5ba8a5289f22d3aeb4fca3824bfd9b4af upstream. + +The IGTK keys are only removed by mac80211 after it has already +removed the AP station. This causes the driver to throw an error +because mac80211 is trying to remove the IGTK when the station doesn't +exist anymore. + +The firmware is aware that the station has been removed and can deal +with it the next time we try to add an IGTK for a station, so we +shouldn't try to remove the key if the station ID is +IWL_MVM_INVALID_STA. Do this by removing the check for mvm_sta before +calling iwl_mvm_send_sta_igtk() and check return from that function +gracefully if the station ID is invalid. + +Cc: stable@vger.kernel.org # 4.12+ +Signed-off-by: Luca Coelho +Signed-off-by: Kalle Valo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +@@ -3045,6 +3045,10 @@ static int iwl_mvm_send_sta_igtk(struct + igtk_cmd.sta_id = cpu_to_le32(sta_id); + + if (remove_key) { ++ /* This is a valid situation for IGTK */ ++ if (sta_id == IWL_MVM_INVALID_STA) ++ return 0; ++ + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); + } else { + struct ieee80211_key_seq seq; +@@ -3352,9 +3356,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mv + IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", + keyconf->keyidx, sta_id); + +- if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || +- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || +- keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) ++ if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || ++ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || ++ keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) + return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); + + if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { diff --git a/queue-4.19/jbd2_seq_info_next-should-increase-position-index.patch b/queue-4.19/jbd2_seq_info_next-should-increase-position-index.patch new file mode 100644 index 00000000000..fa79fc40ede --- /dev/null +++ b/queue-4.19/jbd2_seq_info_next-should-increase-position-index.patch @@ -0,0 +1,39 @@ +From 1a8e9cf40c9a6a2e40b1e924b13ed303aeea4418 Mon Sep 17 00:00:00 2001 +From: Vasily Averin +Date: Thu, 23 Jan 2020 12:05:10 +0300 +Subject: jbd2_seq_info_next should increase position index + +From: Vasily Averin + +commit 1a8e9cf40c9a6a2e40b1e924b13ed303aeea4418 upstream. + +if seq_file .next fuction does not change position index, +read after some lseek can generate unexpected output. + +Script below generates endless output + $ q=;while read -r r;do echo "$((++q)) $r";done +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/d13805e5-695e-8ac3-b678-26ca2313629f@virtuozzo.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/journal.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1002,6 +1002,7 @@ static void *jbd2_seq_info_start(struct + + static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ (*pos)++; + return NULL; + } + diff --git a/queue-4.19/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch b/queue-4.19/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch new file mode 100644 index 00000000000..f737a36a641 --- /dev/null +++ b/queue-4.19/kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch @@ -0,0 +1,44 @@ +From 1a978d9d3e72ddfa40ac60d26301b154247ee0bc Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 Dec 2019 13:54:46 -0800 +Subject: KVM: PPC: Book3S HV: Uninit vCPU if vcore creation fails + +From: Sean Christopherson + +commit 1a978d9d3e72ddfa40ac60d26301b154247ee0bc upstream. + +Call kvm_vcpu_uninit() if vcore creation fails to avoid leaking any +resources allocated by kvm_vcpu_init(), i.e. the vcpu->run page. + +Fixes: 371fefd6f2dc4 ("KVM: PPC: Allow book3s_hv guests to use SMT processor modes") +Cc: stable@vger.kernel.org +Reviewed-by: Greg Kurz +Signed-off-by: Sean Christopherson +Acked-by: Paul Mackerras +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/book3s_hv.c ++++ b/arch/powerpc/kvm/book3s_hv.c +@@ -2065,7 +2065,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu + mutex_unlock(&kvm->lock); + + if (!vcore) +- goto free_vcpu; ++ goto uninit_vcpu; + + spin_lock(&vcore->lock); + ++vcore->num_threads; +@@ -2082,6 +2082,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu + + return vcpu; + ++uninit_vcpu: ++ kvm_vcpu_uninit(vcpu); + free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); + out: diff --git a/queue-4.19/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch b/queue-4.19/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch new file mode 100644 index 00000000000..34c842f43ad --- /dev/null +++ b/queue-4.19/kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch @@ -0,0 +1,41 @@ +From cb10bf9194f4d2c5d830eddca861f7ca0fecdbb4 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 Dec 2019 13:54:47 -0800 +Subject: KVM: PPC: Book3S PR: Free shared page if mmu initialization fails + +From: Sean Christopherson + +commit cb10bf9194f4d2c5d830eddca861f7ca0fecdbb4 upstream. + +Explicitly free the shared page if kvmppc_mmu_init() fails during +kvmppc_core_vcpu_create(), as the page is freed only in +kvmppc_core_vcpu_free(), which is not reached via kvm_vcpu_uninit(). + +Fixes: 96bc451a15329 ("KVM: PPC: Introduce shared page") +Cc: stable@vger.kernel.org +Reviewed-by: Greg Kurz +Signed-off-by: Sean Christopherson +Acked-by: Paul Mackerras +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_pr.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kvm/book3s_pr.c ++++ b/arch/powerpc/kvm/book3s_pr.c +@@ -1772,10 +1772,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu + + err = kvmppc_mmu_init(vcpu); + if (err < 0) +- goto uninit_vcpu; ++ goto free_shared_page; + + return vcpu; + ++free_shared_page: ++ free_page((unsigned long)vcpu->arch.shared); + uninit_vcpu: + kvm_vcpu_uninit(vcpu); + free_shadow_vcpu: diff --git a/queue-4.19/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch b/queue-4.19/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch new file mode 100644 index 00000000000..cea097dc510 --- /dev/null +++ b/queue-4.19/kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch @@ -0,0 +1,55 @@ +From f958bd2314d117f8c29f4821401bc1925bc2e5ef Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 9 Dec 2019 12:19:31 -0800 +Subject: KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform + +From: Sean Christopherson + +commit f958bd2314d117f8c29f4821401bc1925bc2e5ef upstream. + +Unlike most state managed by XSAVE, MPX is initialized to zero on INIT. +Because INITs are usually recognized in the context of a VCPU_RUN call, +kvm_vcpu_reset() puts the guest's FPU so that the FPU state is resident +in memory, zeros the MPX state, and reloads FPU state to hardware. But, +in the unlikely event that an INIT is recognized during +kvm_arch_vcpu_ioctl_get_mpstate() via kvm_apic_accept_events(), +kvm_vcpu_reset() will call kvm_put_guest_fpu() without a preceding +kvm_load_guest_fpu() and corrupt the guest's FPU state (and possibly +userspace's FPU state as well). + +Given that MPX is being removed from the kernel[*], fix the bug with the +simple-but-ugly approach of loading the guest's FPU during +KVM_GET_MP_STATE. + +[*] See commit f240652b6032b ("x86/mpx: Remove MPX APIs"). + +Fixes: f775b13eedee2 ("x86,kvm: move qemu/guest FPU switching out to vcpu_run") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8235,6 +8235,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(stru + struct kvm_mp_state *mp_state) + { + vcpu_load(vcpu); ++ if (kvm_mpx_supported()) ++ kvm_load_guest_fpu(vcpu); + + kvm_apic_accept_events(vcpu); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && +@@ -8243,6 +8245,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(stru + else + mp_state->mp_state = vcpu->arch.mp_state; + ++ if (kvm_mpx_supported()) ++ kvm_put_guest_fpu(vcpu); + vcpu_put(vcpu); + return 0; + } diff --git a/queue-4.19/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..77c4645894a --- /dev/null +++ b/queue-4.19/kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,57 @@ +From ea740059ecb37807ba47b84b33d1447435a8d868 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:52 -0800 +Subject: KVM: x86: Protect DR-based index computations from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit ea740059ecb37807ba47b84b33d1447435a8d868 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in __kvm_set_dr() and +kvm_get_dr(). +Both kvm_get_dr() and kvm_set_dr() (a wrapper of __kvm_set_dr()) are +exported symbols so KVM should tream them conservatively from a security +perspective. + +Fixes: 020df0794f57 ("KVM: move DR register access handling into generic code") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -961,9 +961,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu + + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) + { ++ size_t size = ARRAY_SIZE(vcpu->arch.db); ++ + switch (dr) { + case 0 ... 3: +- vcpu->arch.db[dr] = val; ++ vcpu->arch.db[array_index_nospec(dr, size)] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; +@@ -1000,9 +1002,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); + + int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) + { ++ size_t size = ARRAY_SIZE(vcpu->arch.db); ++ + switch (dr) { + case 0 ... 3: +- *val = vcpu->arch.db[dr]; ++ *val = vcpu->arch.db[array_index_nospec(dr, size)]; + break; + case 4: + /* fall through */ diff --git a/queue-4.19/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..9373c0d1d8e --- /dev/null +++ b/queue-4.19/kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,58 @@ +From 8c86405f606ca8508b8d9280680166ca26723695 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:44 -0800 +Subject: KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 8c86405f606ca8508b8d9280680166ca26723695 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in ioapic_read_indirect(). +This function contains index computations based on the +(attacker-controlled) IOREGSEL register. + +Fixes: a2c118bfab8b ("KVM: Fix bounds checking in ioapic indirect register reads (CVE-2013-1798)") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/ioapic.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -73,13 +74,14 @@ static unsigned long ioapic_read_indirec + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; +- u64 redir_content; ++ u64 redir_content = ~0ULL; + +- if (redir_index < IOAPIC_NUM_PINS) +- redir_content = +- ioapic->redirtbl[redir_index].bits; +- else +- redir_content = ~0ULL; ++ if (redir_index < IOAPIC_NUM_PINS) { ++ u32 index = array_index_nospec( ++ redir_index, IOAPIC_NUM_PINS); ++ ++ redir_content = ioapic->redirtbl[index].bits; ++ } + + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : diff --git a/queue-4.19/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..8169f68e8b5 --- /dev/null +++ b/queue-4.19/kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,40 @@ +From 670564559ca35b439c8d8861fc399451ddf95137 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:45 -0800 +Subject: KVM: x86: Protect ioapic_write_indirect() from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 670564559ca35b439c8d8861fc399451ddf95137 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in ioapic_write_indirect(). +This function contains index computations based on the +(attacker-controlled) IOREGSEL register. + +This patch depends on patch +"KVM: x86: Protect ioapic_read_indirect() from Spectre-v1/L1TF attacks". + +Fixes: 70f93dae32ac ("KVM: Use temporary variable to shorten lines.") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/ioapic.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -297,6 +297,7 @@ static void ioapic_write_indirect(struct + ioapic_debug("change redir index %x val %x\n", index, val); + if (index >= IOAPIC_NUM_PINS) + return; ++ index = array_index_nospec(index, IOAPIC_NUM_PINS); + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; + /* Preserve read-only fields */ diff --git a/queue-4.19/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..fe821664c2c --- /dev/null +++ b/queue-4.19/kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,59 @@ +From 8618793750071d66028584a83ed0b4fa7eb4f607 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:42 -0800 +Subject: KVM: x86: Protect kvm_hv_msr_[get|set]_crash_data() from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 8618793750071d66028584a83ed0b4fa7eb4f607 upstream. + +This fixes Spectre-v1/L1TF vulnerabilities in kvm_hv_msr_get_crash_data() +and kvm_hv_msr_set_crash_data(). +These functions contain index computations that use the +(attacker-controlled) MSR number. + +Fixes: e7d9513b60e8 ("kvm/x86: added hyper-v crash msrs into kvm hyperv context") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/hyperv.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/hyperv.c ++++ b/arch/x86/kvm/hyperv.c +@@ -792,11 +792,12 @@ static int kvm_hv_msr_get_crash_data(str + u32 index, u64 *pdata) + { + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; ++ size_t size = ARRAY_SIZE(hv->hv_crash_param); + +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) ++ if (WARN_ON_ONCE(index >= size)) + return -EINVAL; + +- *pdata = hv->hv_crash_param[index]; ++ *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; + return 0; + } + +@@ -835,11 +836,12 @@ static int kvm_hv_msr_set_crash_data(str + u32 index, u64 data) + { + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; ++ size_t size = ARRAY_SIZE(hv->hv_crash_param); + +- if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) ++ if (WARN_ON_ONCE(index >= size)) + return -EINVAL; + +- hv->hv_crash_param[index] = data; ++ hv->hv_crash_param[array_index_nospec(index, size)] = data; + return 0; + } + diff --git a/queue-4.19/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..d328dd9ecf2 --- /dev/null +++ b/queue-4.19/kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,54 @@ +From 4bf79cb089f6b1c6c632492c0271054ce52ad766 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:46 -0800 +Subject: KVM: x86: Protect kvm_lapic_reg_write() from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 4bf79cb089f6b1c6c632492c0271054ce52ad766 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in kvm_lapic_reg_write(). +This function contains index computations based on the +(attacker-controlled) MSR number. + +Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/lapic.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1862,15 +1862,20 @@ int kvm_lapic_reg_write(struct kvm_lapic + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT1: +- case APIC_LVTERR: ++ case APIC_LVTERR: { + /* TODO: Check vector */ ++ size_t size; ++ u32 index; ++ + if (!kvm_apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; +- +- val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; ++ size = ARRAY_SIZE(apic_lvt_mask); ++ index = array_index_nospec( ++ (reg - APIC_LVTT) >> 4, size); ++ val &= apic_lvt_mask[index]; + kvm_lapic_set_reg(apic, reg, val); +- + break; ++ } + + case APIC_LVTT: + if (!kvm_apic_sw_enabled(apic)) diff --git a/queue-4.19/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch b/queue-4.19/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch new file mode 100644 index 00000000000..f8d2c6c3bc2 --- /dev/null +++ b/queue-4.19/kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch @@ -0,0 +1,54 @@ +From 6ec4c5eee1750d5d17951c4e1960d953376a0dda Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:49 -0800 +Subject: KVM: x86: Protect MSR-based index computations from Spectre-v1/L1TF attacks in x86.c + +From: Marios Pomonis + +commit 6ec4c5eee1750d5d17951c4e1960d953376a0dda upstream. + +This fixes a Spectre-v1/L1TF vulnerability in set_msr_mce() and +get_msr_mce(). +Both functions contain index computations based on the +(attacker-controlled) MSR number. + +Fixes: 890ca9aefa78 ("KVM: Add MCE support") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -2273,7 +2273,10 @@ static int set_msr_mce(struct kvm_vcpu * + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MCx_CTL(bank_num)) { +- u32 offset = msr - MSR_IA32_MC0_CTL; ++ u32 offset = array_index_nospec( ++ msr - MSR_IA32_MC0_CTL, ++ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); ++ + /* only 0 or all 1s can be written to IA32_MCi_CTL + * some Linux kernels though clear bit 10 in bank 4 to + * workaround a BIOS/GART TBL issue on AMD K8s, ignore +@@ -2685,7 +2688,10 @@ static int get_msr_mce(struct kvm_vcpu * + default: + if (msr >= MSR_IA32_MC0_CTL && + msr < MSR_IA32_MCx_CTL(bank_num)) { +- u32 offset = msr - MSR_IA32_MC0_CTL; ++ u32 offset = array_index_nospec( ++ msr - MSR_IA32_MC0_CTL, ++ MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); ++ + data = vcpu->arch.mce_banks[offset]; + break; + } diff --git a/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..c756368070b --- /dev/null +++ b/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,47 @@ +From 25a5edea71b7c154b6a0b8cec14c711cafa31d26 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:47 -0800 +Subject: KVM: x86: Protect MSR-based index computations in fixed_msr_to_seg_unit() from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 25a5edea71b7c154b6a0b8cec14c711cafa31d26 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in fixed_msr_to_seg_unit(). +This function contains index computations based on the +(attacker-controlled) MSR number. + +Fixes: de9aef5e1ad6 ("KVM: MTRR: introduce fixed_mtrr_segment table") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/mtrr.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/mtrr.c ++++ b/arch/x86/kvm/mtrr.c +@@ -194,11 +194,15 @@ static bool fixed_msr_to_seg_unit(u32 ms + break; + case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: + *seg = 1; +- *unit = msr - MSR_MTRRfix16K_80000; ++ *unit = array_index_nospec( ++ msr - MSR_MTRRfix16K_80000, ++ MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); + break; + case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: + *seg = 2; +- *unit = msr - MSR_MTRRfix4K_C0000; ++ *unit = array_index_nospec( ++ msr - MSR_MTRRfix4K_C0000, ++ MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); + break; + default: + return false; diff --git a/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..b477bcaa52a --- /dev/null +++ b/queue-4.19/kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,69 @@ +From 13c5183a4e643cc2b03a22d0e582c8e17bb7457d Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:48 -0800 +Subject: KVM: x86: Protect MSR-based index computations in pmu.h from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 13c5183a4e643cc2b03a22d0e582c8e17bb7457d upstream. + +This fixes a Spectre-v1/L1TF vulnerability in the get_gp_pmc() and +get_fixed_pmc() functions. +They both contain index computations based on the (attacker-controlled) +MSR number. + +Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/pmu.h | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/pmu.h ++++ b/arch/x86/kvm/pmu.h +@@ -2,6 +2,8 @@ + #ifndef __KVM_X86_PMU_H + #define __KVM_X86_PMU_H + ++#include ++ + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) + #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) + #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) +@@ -86,8 +88,12 @@ static inline bool pmc_is_enabled(struct + static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, + u32 base) + { +- if (msr >= base && msr < base + pmu->nr_arch_gp_counters) +- return &pmu->gp_counters[msr - base]; ++ if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { ++ u32 index = array_index_nospec(msr - base, ++ pmu->nr_arch_gp_counters); ++ ++ return &pmu->gp_counters[index]; ++ } + + return NULL; + } +@@ -97,8 +103,12 @@ static inline struct kvm_pmc *get_fixed_ + { + int base = MSR_CORE_PERF_FIXED_CTR0; + +- if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) +- return &pmu->fixed_counters[msr - base]; ++ if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { ++ u32 index = array_index_nospec(msr - base, ++ pmu->nr_arch_fixed_counters); ++ ++ return &pmu->fixed_counters[index]; ++ } + + return NULL; + } diff --git a/queue-4.19/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..fec4a1cc45e --- /dev/null +++ b/queue-4.19/kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,76 @@ +From 66061740f1a487f4ed54fde75e724709f805da53 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:53 -0800 +Subject: KVM: x86: Protect pmu_intel.c from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 66061740f1a487f4ed54fde75e724709f805da53 upstream. + +This fixes Spectre-v1/L1TF vulnerabilities in intel_find_fixed_event() +and intel_rdpmc_ecx_to_pmc(). +kvm_rdpmc() (ancestor of intel_find_fixed_event()) and +reprogram_fixed_counter() (ancestor of intel_rdpmc_ecx_to_pmc()) are +exported symbols so KVM should treat them conservatively from a security +perspective. + +Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/pmu_intel.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +--- a/arch/x86/kvm/pmu_intel.c ++++ b/arch/x86/kvm/pmu_intel.c +@@ -87,10 +87,14 @@ static unsigned intel_find_arch_event(st + + static unsigned intel_find_fixed_event(int idx) + { +- if (idx >= ARRAY_SIZE(fixed_pmc_events)) ++ u32 event; ++ size_t size = ARRAY_SIZE(fixed_pmc_events); ++ ++ if (idx >= size) + return PERF_COUNT_HW_MAX; + +- return intel_arch_events[fixed_pmc_events[idx]].event_type; ++ event = fixed_pmc_events[array_index_nospec(idx, size)]; ++ return intel_arch_events[event].event_type; + } + + /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ +@@ -131,16 +135,20 @@ static struct kvm_pmc *intel_msr_idx_to_ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + bool fixed = idx & (1u << 30); + struct kvm_pmc *counters; ++ unsigned int num_counters; + + idx &= ~(3u << 30); +- if (!fixed && idx >= pmu->nr_arch_gp_counters) +- return NULL; +- if (fixed && idx >= pmu->nr_arch_fixed_counters) ++ if (fixed) { ++ counters = pmu->fixed_counters; ++ num_counters = pmu->nr_arch_fixed_counters; ++ } else { ++ counters = pmu->gp_counters; ++ num_counters = pmu->nr_arch_gp_counters; ++ } ++ if (idx >= num_counters) + return NULL; +- counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; +- +- return &counters[idx]; ++ return &counters[array_index_nospec(idx, num_counters)]; + } + + static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/queue-4.19/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..0411c7609ac --- /dev/null +++ b/queue-4.19/kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,48 @@ +From 3c9053a2cae7ba2ba73766a34cea41baa70f57f7 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:41 -0800 +Subject: KVM: x86: Protect x86_decode_insn from Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 3c9053a2cae7ba2ba73766a34cea41baa70f57f7 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in x86_decode_insn(). +kvm_emulate_instruction() (an ancestor of x86_decode_insn()) is an exported +symbol, so KVM should treat it conservatively from a security perspective. + +Fixes: 045a282ca415 ("KVM: emulator: implement fninit, fnstsw, fnstcw") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/emulate.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -5269,10 +5269,15 @@ done_prefixes: + } + break; + case Escape: +- if (ctxt->modrm > 0xbf) +- opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; +- else ++ if (ctxt->modrm > 0xbf) { ++ size_t size = ARRAY_SIZE(opcode.u.esc->high); ++ u32 index = array_index_nospec( ++ ctxt->modrm - 0xc0, size); ++ ++ opcode = opcode.u.esc->high[index]; ++ } else { + opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; ++ } + break; + case InstrDual: + if ((ctxt->modrm >> 6) == 3) diff --git a/queue-4.19/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..eb7e20daeab --- /dev/null +++ b/queue-4.19/kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,45 @@ +From 14e32321f3606e4b0970200b6e5e47ee6f1e6410 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:43 -0800 +Subject: KVM: x86: Refactor picdev_write() to prevent Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 14e32321f3606e4b0970200b6e5e47ee6f1e6410 upstream. + +This fixes a Spectre-v1/L1TF vulnerability in picdev_write(). +It replaces index computations based on the (attacked-controlled) port +number with constants through a minor refactoring. + +Fixes: 85f455f7ddbe ("KVM: Add support for in-kernel PIC emulation") + +Signed-off-by: Nick Finco +Signed-off-by: Marios Pomonis +Reviewed-by: Andrew Honig +Cc: stable@vger.kernel.org +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/i8259.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/i8259.c ++++ b/arch/x86/kvm/i8259.c +@@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic * + switch (addr) { + case 0x20: + case 0x21: ++ pic_lock(s); ++ pic_ioport_write(&s->pics[0], addr, data); ++ pic_unlock(s); ++ break; + case 0xa0: + case 0xa1: + pic_lock(s); +- pic_ioport_write(&s->pics[addr >> 7], addr, data); ++ pic_ioport_write(&s->pics[1], addr, data); + pic_unlock(s); + break; + case 0x4d0: diff --git a/queue-4.19/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch b/queue-4.19/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch new file mode 100644 index 00000000000..528a8c4c7af --- /dev/null +++ b/queue-4.19/kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch @@ -0,0 +1,57 @@ +From 125ffc5e0a56a3eded608dc51e09d5ebf72cf652 Mon Sep 17 00:00:00 2001 +From: Marios Pomonis +Date: Wed, 11 Dec 2019 12:47:50 -0800 +Subject: KVM: x86: Refactor prefix decoding to prevent Spectre-v1/L1TF attacks + +From: Marios Pomonis + +commit 125ffc5e0a56a3eded608dc51e09d5ebf72cf652 upstream. + +This fixes Spectre-v1/L1TF vulnerabilities in +vmx_read_guest_seg_selector(), vmx_read_guest_seg_base(), +vmx_read_guest_seg_limit() and vmx_read_guest_seg_ar(). When +invoked from emulation, these functions contain index computations +based on the (attacker-influenced) segment value. Using constants +prevents the attack. + +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/emulate.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -5164,16 +5164,28 @@ int x86_decode_insn(struct x86_emulate_c + ctxt->ad_bytes = def_ad_bytes ^ 6; + break; + case 0x26: /* ES override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_ES; ++ break; + case 0x2e: /* CS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_CS; ++ break; + case 0x36: /* SS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_SS; ++ break; + case 0x3e: /* DS override */ + has_seg_override = true; +- ctxt->seg_override = (ctxt->b >> 3) & 3; ++ ctxt->seg_override = VCPU_SREG_DS; + break; + case 0x64: /* FS override */ ++ has_seg_override = true; ++ ctxt->seg_override = VCPU_SREG_FS; ++ break; + case 0x65: /* GS override */ + has_seg_override = true; +- ctxt->seg_override = ctxt->b & 7; ++ ctxt->seg_override = VCPU_SREG_GS; + break; + case 0x40 ... 0x4f: /* REX */ + if (mode != X86EMUL_MODE_PROT64) diff --git a/queue-4.19/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch b/queue-4.19/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch new file mode 100644 index 00000000000..bba12527fd5 --- /dev/null +++ b/queue-4.19/media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch @@ -0,0 +1,145 @@ +From 080d89f522e2baddb4fbbd1af4b67b5f92537ef8 Mon Sep 17 00:00:00 2001 +From: Sean Young +Date: Thu, 21 Nov 2019 11:10:47 +0100 +Subject: media: rc: ensure lirc is initialized before registering input device + +From: Sean Young + +commit 080d89f522e2baddb4fbbd1af4b67b5f92537ef8 upstream. + +Once rc_open is called on the input device, lirc events can be delivered. +Ensure lirc is ready to do so else we might get this: + +Registered IR keymap rc-hauppauge +rc rc0: Hauppauge WinTV PVR-350 as +/devices/pci0000:00/0000:00:1e.0/0000:04:00.0/i2c-0/0-0018/rc/rc0 +input: Hauppauge WinTV PVR-350 as +/devices/pci0000:00/0000:00:1e.0/0000:04:00.0/i2c-0/0-0018/rc/rc0/input9 +BUG: kernel NULL pointer dereference, address: 0000000000000038 +PGD 0 P4D 0 +Oops: 0000 [#1] SMP PTI +CPU: 1 PID: 17 Comm: kworker/1:0 Not tainted 5.3.11-300.fc31.x86_64 #1 +Hardware name: /DG43NB, BIOS NBG4310H.86A.0096.2009.0903.1845 09/03/2009 +Workqueue: events ir_work [ir_kbd_i2c] +RIP: 0010:ir_lirc_scancode_event+0x3d/0xb0 +Code: a6 b4 07 00 00 49 81 c6 b8 07 00 00 55 53 e8 ba a7 9d ff 4c 89 +e7 49 89 45 00 e8 5e 7a 25 00 49 8b 1e 48 89 c5 4c 39 f3 74 58 <8b> 43 +38 8b 53 40 89 c1 2b 4b 3c 39 ca 72 41 21 d0 49 8b 7d 00 49 +RSP: 0018:ffffaae2000b3d88 EFLAGS: 00010017 +RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000019 +RDX: 0000000000000001 RSI: 006e801b1f26ce6a RDI: ffff9e39797c37b4 +RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001 +R10: 0000000000000001 R11: 0000000000000001 R12: ffff9e39797c37b4 +R13: ffffaae2000b3db8 R14: ffff9e39797c37b8 R15: ffff9e39797c33d8 +FS: 0000000000000000(0000) GS:ffff9e397b680000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000038 CR3: 0000000035844000 CR4: 00000000000006e0 +Call Trace: +ir_do_keydown+0x8e/0x2b0 +rc_keydown+0x52/0xc0 +ir_work+0xb8/0x130 [ir_kbd_i2c] +process_one_work+0x19d/0x340 +worker_thread+0x50/0x3b0 +kthread+0xfb/0x130 +? process_one_work+0x340/0x340 +? kthread_park+0x80/0x80 +ret_from_fork+0x35/0x40 +Modules linked in: rc_hauppauge tuner msp3400 saa7127 saa7115 ivtv(+) +tveeprom cx2341x v4l2_common videodev mc i2c_algo_bit ir_kbd_i2c +ip_tables firewire_ohci e1000e serio_raw firewire_core ata_generic +crc_itu_t pata_acpi pata_jmicron fuse +CR2: 0000000000000038 +---[ end trace c67c2697a99fa74b ]--- +RIP: 0010:ir_lirc_scancode_event+0x3d/0xb0 +Code: a6 b4 07 00 00 49 81 c6 b8 07 00 00 55 53 e8 ba a7 9d ff 4c 89 +e7 49 89 45 00 e8 5e 7a 25 00 49 8b 1e 48 89 c5 4c 39 f3 74 58 <8b> 43 +38 8b 53 40 89 c1 2b 4b 3c 39 ca 72 41 21 d0 49 8b 7d 00 49 +RSP: 0018:ffffaae2000b3d88 EFLAGS: 00010017 +RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000019 +RDX: 0000000000000001 RSI: 006e801b1f26ce6a RDI: ffff9e39797c37b4 +RBP: 0000000000000002 R08: 0000000000000001 R09: 0000000000000001 +R10: 0000000000000001 R11: 0000000000000001 R12: ffff9e39797c37b4 +R13: ffffaae2000b3db8 R14: ffff9e39797c37b8 R15: ffff9e39797c33d8 +FS: 0000000000000000(0000) GS:ffff9e397b680000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000038 CR3: 0000000035844000 CR4: 00000000000006e0 +rc rc0: lirc_dev: driver ir_kbd_i2c registered at minor = 0, scancode +receiver, no transmitter +tuner-simple 0-0061: creating new instance +tuner-simple 0-0061: type set to 2 (Philips NTSC (FI1236,FM1236 and +compatibles)) +ivtv0: Registered device video0 for encoder MPG (4096 kB) +ivtv0: Registered device video32 for encoder YUV (2048 kB) +ivtv0: Registered device vbi0 for encoder VBI (1024 kB) +ivtv0: Registered device video24 for encoder PCM (320 kB) +ivtv0: Registered device radio0 for encoder radio +ivtv0: Registered device video16 for decoder MPG (1024 kB) +ivtv0: Registered device vbi8 for decoder VBI (64 kB) +ivtv0: Registered device vbi16 for decoder VOUT +ivtv0: Registered device video48 for decoder YUV (1024 kB) + +Cc: stable@vger.kernel.org +Tested-by: Nick French +Reported-by: Nick French +Signed-off-by: Sean Young +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/media/rc/rc-main.c | 27 ++++++++++++++++----------- + 1 file changed, 16 insertions(+), 11 deletions(-) + +--- a/drivers/media/rc/rc-main.c ++++ b/drivers/media/rc/rc-main.c +@@ -1874,23 +1874,28 @@ int rc_register_device(struct rc_dev *de + + dev->registered = true; + +- if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { +- rc = rc_setup_rx_device(dev); +- if (rc) +- goto out_dev; +- } +- +- /* Ensure that the lirc kfifo is setup before we start the thread */ ++ /* ++ * once the the input device is registered in rc_setup_rx_device, ++ * userspace can open the input device and rc_open() will be called ++ * as a result. This results in driver code being allowed to submit ++ * keycodes with rc_keydown, so lirc must be registered first. ++ */ + if (dev->allowed_protocols != RC_PROTO_BIT_CEC) { + rc = ir_lirc_register(dev); + if (rc < 0) +- goto out_rx; ++ goto out_dev; ++ } ++ ++ if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { ++ rc = rc_setup_rx_device(dev); ++ if (rc) ++ goto out_lirc; + } + + if (dev->driver_type == RC_DRIVER_IR_RAW) { + rc = ir_raw_event_register(dev); + if (rc < 0) +- goto out_lirc; ++ goto out_rx; + } + + dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor, +@@ -1898,11 +1903,11 @@ int rc_register_device(struct rc_dev *de + + return 0; + ++out_rx: ++ rc_free_rx_device(dev); + out_lirc: + if (dev->allowed_protocols != RC_PROTO_BIT_CEC) + ir_lirc_unregister(dev); +-out_rx: +- rc_free_rx_device(dev); + out_dev: + device_del(&dev->dev); + out_rx_free: diff --git a/queue-4.19/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch b/queue-4.19/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch new file mode 100644 index 00000000000..e740b6ec0e1 --- /dev/null +++ b/queue-4.19/mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch @@ -0,0 +1,35 @@ +From 65b1aae0d9d5962faccc06bdb8e91a2a0b09451c Mon Sep 17 00:00:00 2001 +From: Brian Norris +Date: Mon, 6 Jan 2020 14:42:12 -0800 +Subject: mwifiex: fix unbalanced locking in mwifiex_process_country_ie() + +From: Brian Norris + +commit 65b1aae0d9d5962faccc06bdb8e91a2a0b09451c upstream. + +We called rcu_read_lock(), so we need to call rcu_read_unlock() before +we return. + +Fixes: 3d94a4a8373b ("mwifiex: fix possible heap overflow in mwifiex_process_country_ie()") +Cc: stable@vger.kernel.org +Cc: huangwen +Cc: Ganapathi Bhat +Signed-off-by: Brian Norris +Acked-by: Ganapathi Bhat +Signed-off-by: Kalle Valo +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c ++++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +@@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(st + + if (country_ie_len > + (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { ++ rcu_read_unlock(); + mwifiex_dbg(priv->adapter, ERROR, + "11D: country_ie_len overflow!, deauth AP\n"); + return -EINVAL; diff --git a/queue-4.19/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch b/queue-4.19/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch new file mode 100644 index 00000000000..523e26a5a54 --- /dev/null +++ b/queue-4.19/nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch @@ -0,0 +1,112 @@ +From 114de38225d9b300f027e2aec9afbb6e0def154b Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Sun, 2 Feb 2020 17:53:54 -0500 +Subject: NFS: Directory page cache pages need to be locked when read + +From: Trond Myklebust + +commit 114de38225d9b300f027e2aec9afbb6e0def154b upstream. + +When a NFS directory page cache page is removed from the page cache, +its contents are freed through a call to nfs_readdir_clear_array(). +To prevent the removal of the page cache entry until after we've +finished reading it, we must take the page lock. + +Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") +Cc: stable@vger.kernel.org # v2.6.37+ +Signed-off-by: Trond Myklebust +Reviewed-by: Benjamin Coddington +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/dir.c | 30 +++++++++++++++++++----------- + 1 file changed, 19 insertions(+), 11 deletions(-) + +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -701,8 +701,6 @@ int nfs_readdir_filler(nfs_readdir_descr + static + void cache_page_release(nfs_readdir_descriptor_t *desc) + { +- if (!desc->page->mapping) +- nfs_readdir_clear_array(desc->page); + put_page(desc->page); + desc->page = NULL; + } +@@ -716,19 +714,28 @@ struct page *get_cache_page(nfs_readdir_ + + /* + * Returns 0 if desc->dir_cookie was found on page desc->page_index ++ * and locks the page to prevent removal from the page cache. + */ + static +-int find_cache_page(nfs_readdir_descriptor_t *desc) ++int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) + { + int res; + + desc->page = get_cache_page(desc); + if (IS_ERR(desc->page)) + return PTR_ERR(desc->page); +- +- res = nfs_readdir_search_array(desc); ++ res = lock_page_killable(desc->page); + if (res != 0) +- cache_page_release(desc); ++ goto error; ++ res = -EAGAIN; ++ if (desc->page->mapping != NULL) { ++ res = nfs_readdir_search_array(desc); ++ if (res == 0) ++ return 0; ++ } ++ unlock_page(desc->page); ++error: ++ cache_page_release(desc); + return res; + } + +@@ -743,7 +750,7 @@ int readdir_search_pagecache(nfs_readdir + desc->last_cookie = 0; + } + do { +- res = find_cache_page(desc); ++ res = find_and_lock_cache_page(desc); + } while (res == -EAGAIN); + return res; + } +@@ -782,7 +789,6 @@ int nfs_do_filldir(nfs_readdir_descripto + desc->eof = true; + + kunmap(desc->page); +- cache_page_release(desc); + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", + (unsigned long long)*desc->dir_cookie, res); + return res; +@@ -828,13 +834,13 @@ int uncached_readdir(nfs_readdir_descrip + + status = nfs_do_filldir(desc); + ++ out_release: ++ nfs_readdir_clear_array(desc->page); ++ cache_page_release(desc); + out: + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", + __func__, status); + return status; +- out_release: +- cache_page_release(desc); +- goto out; + } + + /* The file offset position represents the dirent entry number. A +@@ -899,6 +905,8 @@ static int nfs_readdir(struct file *file + break; + + res = nfs_do_filldir(desc); ++ unlock_page(desc->page); ++ cache_page_release(desc); + if (res < 0) + break; + } while (!desc->eof); diff --git a/queue-4.19/nfs-fix-memory-leaks-and-corruption-in-readdir.patch b/queue-4.19/nfs-fix-memory-leaks-and-corruption-in-readdir.patch new file mode 100644 index 00000000000..87c7753d9c5 --- /dev/null +++ b/queue-4.19/nfs-fix-memory-leaks-and-corruption-in-readdir.patch @@ -0,0 +1,81 @@ +From 4b310319c6a8ce708f1033d57145e2aa027a883c Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Sun, 2 Feb 2020 17:53:53 -0500 +Subject: NFS: Fix memory leaks and corruption in readdir + +From: Trond Myklebust + +commit 4b310319c6a8ce708f1033d57145e2aa027a883c upstream. + +nfs_readdir_xdr_to_array() must not exit without having initialised +the array, so that the page cache deletion routines can safely +call nfs_readdir_clear_array(). +Furthermore, we should ensure that if we exit nfs_readdir_filler() +with an error, we free up any page contents to prevent a leak +if we try to fill the page again. + +Fixes: 11de3b11e08c ("NFS: Fix a memory leak in nfs_readdir") +Cc: stable@vger.kernel.org # v2.6.37+ +Signed-off-by: Trond Myklebust +Reviewed-by: Benjamin Coddington +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/dir.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -162,6 +162,17 @@ typedef struct { + bool eof; + } nfs_readdir_descriptor_t; + ++static ++void nfs_readdir_init_array(struct page *page) ++{ ++ struct nfs_cache_array *array; ++ ++ array = kmap_atomic(page); ++ memset(array, 0, sizeof(struct nfs_cache_array)); ++ array->eof_index = -1; ++ kunmap_atomic(array); ++} ++ + /* + * we are freeing strings created by nfs_add_to_readdir_array() + */ +@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page + array = kmap_atomic(page); + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); ++ array->size = 0; + kunmap_atomic(array); + } + +@@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir + int status = -ENOMEM; + unsigned int array_size = ARRAY_SIZE(pages); + ++ nfs_readdir_init_array(page); ++ + entry.prev_cookie = 0; + entry.cookie = desc->last_cookie; + entry.eof = 0; +@@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir + } + + array = kmap(page); +- memset(array, 0, sizeof(struct nfs_cache_array)); +- array->eof_index = -1; + + status = nfs_readdir_alloc_pages(pages, array_size); + if (status < 0) +@@ -681,6 +693,7 @@ int nfs_readdir_filler(nfs_readdir_descr + unlock_page(page); + return 0; + error: ++ nfs_readdir_clear_array(page); + unlock_page(page); + return ret; + } diff --git a/queue-4.19/scsi-qla2xxx-fix-unbound-nvme-response-length.patch b/queue-4.19/scsi-qla2xxx-fix-unbound-nvme-response-length.patch new file mode 100644 index 00000000000..1af873492ca --- /dev/null +++ b/queue-4.19/scsi-qla2xxx-fix-unbound-nvme-response-length.patch @@ -0,0 +1,78 @@ +From 00fe717ee1ea3c2979db4f94b1533c57aed8dea9 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Thu, 23 Jan 2020 20:50:14 -0800 +Subject: scsi: qla2xxx: Fix unbound NVME response length + +From: Arun Easi + +commit 00fe717ee1ea3c2979db4f94b1533c57aed8dea9 upstream. + +On certain cases when response length is less than 32, NVME response data +is supplied inline in IOCB. This is indicated by some combination of state +flags. There was an instance when a high, and incorrect, response length +was indicated causing driver to overrun buffers. Fix this by checking and +limiting the response payload length. + +Fixes: 7401bc18d1ee3 ("scsi: qla2xxx: Add FC-NVMe command handling") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20200124045014.23554-1-hmadhani@marvell.com +Signed-off-by: Arun Easi +Signed-off-by: Himanshu Madhani +Reviewed-by: Ewan D. Milne +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/qla2xxx/qla_dbg.c | 6 ------ + drivers/scsi/qla2xxx/qla_dbg.h | 6 ++++++ + drivers/scsi/qla2xxx/qla_isr.c | 12 ++++++++++++ + 3 files changed, 18 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_dbg.c ++++ b/drivers/scsi/qla2xxx/qla_dbg.c +@@ -2520,12 +2520,6 @@ qla83xx_fw_dump_failed: + /* Driver Debug Functions. */ + /****************************************************************************/ + +-static inline int +-ql_mask_match(uint32_t level) +-{ +- return (level & ql2xextended_error_logging) == level; +-} +- + /* + * This function is for formatting and logging debug information. + * It is to be used when vha is available. It formats the message +--- a/drivers/scsi/qla2xxx/qla_dbg.h ++++ b/drivers/scsi/qla2xxx/qla_dbg.h +@@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_h + extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, + struct qla_hw_data *); + extern int qla24xx_soft_reset(struct qla_hw_data *); ++ ++static inline int ++ql_mask_match(uint level) ++{ ++ return (level & ql2xextended_error_logging) == level; ++} +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -1876,6 +1876,18 @@ static void qla24xx_nvme_iocb_entry(scsi + inbuf = (uint32_t *)&sts->nvme_ersp_data; + outbuf = (uint32_t *)fd->rspaddr; + iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); ++ if (unlikely(iocb->u.nvme.rsp_pyld_len > ++ sizeof(struct nvme_fc_ersp_iu))) { ++ if (ql_mask_match(ql_dbg_io)) { ++ WARN_ONCE(1, "Unexpected response payload length %u.\n", ++ iocb->u.nvme.rsp_pyld_len); ++ ql_log(ql_log_warn, fcport->vha, 0x5100, ++ "Unexpected response payload length %u.\n", ++ iocb->u.nvme.rsp_pyld_len); ++ } ++ iocb->u.nvme.rsp_pyld_len = ++ sizeof(struct nvme_fc_ersp_iu); ++ } + iter = iocb->u.nvme.rsp_pyld_len >> 2; + for (; iter; iter--) + *outbuf++ = swab32(*inbuf++); diff --git a/queue-4.19/series b/queue-4.19/series index b69acfbdac9..c6feaab650d 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -99,3 +99,39 @@ crypto-pcrypt-do-not-clear-may_sleep-flag-in-original-request.patch crypto-atmel-aes-fix-counter-overflow-in-ctr-mode.patch crypto-api-fix-race-condition-in-crypto_spawn_alg.patch crypto-picoxcell-adjust-the-position-of-tasklet_init-and-fix-missed-tasklet_kill.patch +scsi-qla2xxx-fix-unbound-nvme-response-length.patch +nfs-fix-memory-leaks-and-corruption-in-readdir.patch +nfs-directory-page-cache-pages-need-to-be-locked-when-read.patch +jbd2_seq_info_next-should-increase-position-index.patch +btrfs-fix-missing-hole-after-hole-punching-and-fsync-when-using-no_holes.patch +btrfs-set-trans-drity-in-btrfs_commit_transaction.patch +btrfs-fix-race-between-adding-and-putting-tree-mod-seq-elements-and-nodes.patch +arm-tegra-enable-pllp-bypass-during-tegra124-lp1.patch +iwlwifi-don-t-throw-error-when-trying-to-remove-igtk.patch +mwifiex-fix-unbalanced-locking-in-mwifiex_process_country_ie.patch +sunrpc-expiry_time-should-be-seconds-not-timeval.patch +gfs2-move-setting-current-backing_dev_info.patch +gfs2-fix-o_sync-write-handling.patch +drm-rect-avoid-division-by-zero.patch +media-rc-ensure-lirc-is-initialized-before-registering-input-device.patch +tools-kvm_stat-fix-kvm_exit-filter-name.patch +xen-balloon-support-xend-based-toolstack-take-two.patch +watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch +bcache-add-readahead-cache-policy-options-via-sysfs-interface.patch +eventfd-track-eventfd_signal-recursion-depth.patch +aio-prevent-potential-eventfd-recursion-on-poll.patch +kvm-x86-refactor-picdev_write-to-prevent-spectre-v1-l1tf-attacks.patch +kvm-x86-refactor-prefix-decoding-to-prevent-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-pmu_intel.c-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-dr-based-index-computations-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-kvm_lapic_reg_write-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-kvm_hv_msr__crash_data-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-ioapic_write_indirect-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-msr-based-index-computations-in-pmu.h-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-ioapic_read_indirect-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-msr-based-index-computations-from-spectre-v1-l1tf-attacks-in-x86.c.patch +kvm-x86-protect-x86_decode_insn-from-spectre-v1-l1tf-attacks.patch +kvm-x86-protect-msr-based-index-computations-in-fixed_msr_to_seg_unit-from-spectre-v1-l1tf-attacks.patch +kvm-x86-fix-potential-put_fpu-w-o-load_fpu-on-mpx-platform.patch +kvm-ppc-book3s-hv-uninit-vcpu-if-vcore-creation-fails.patch +kvm-ppc-book3s-pr-free-shared-page-if-mmu-initialization-fails.patch diff --git a/queue-4.19/sunrpc-expiry_time-should-be-seconds-not-timeval.patch b/queue-4.19/sunrpc-expiry_time-should-be-seconds-not-timeval.patch new file mode 100644 index 00000000000..34b8f9ee193 --- /dev/null +++ b/queue-4.19/sunrpc-expiry_time-should-be-seconds-not-timeval.patch @@ -0,0 +1,54 @@ +From 3d96208c30f84d6edf9ab4fac813306ac0d20c10 Mon Sep 17 00:00:00 2001 +From: Roberto Bergantinos Corpas +Date: Tue, 4 Feb 2020 11:32:56 +0100 +Subject: sunrpc: expiry_time should be seconds not timeval + +From: Roberto Bergantinos Corpas + +commit 3d96208c30f84d6edf9ab4fac813306ac0d20c10 upstream. + +When upcalling gssproxy, cache_head.expiry_time is set as a +timeval, not seconds since boot. As such, RPC cache expiry +logic will not clean expired objects created under +auth.rpcsec.context cache. + +This has proven to cause kernel memory leaks on field. Using +64 bit variants of getboottime/timespec + +Expiration times have worked this way since 2010's c5b29f885afe "sunrpc: +use seconds since boot in expiry cache". The gssproxy code introduced +in 2012 added gss_proxy_save_rsc and introduced the bug. That's a while +for this to lurk, but it required a bit of an extreme case to make it +obvious. + +Signed-off-by: Roberto Bergantinos Corpas +Cc: stable@vger.kernel.org +Fixes: 030d794bf498 "SUNRPC: Use gssproxy upcall for server..." +Tested-By: Frank Sorenson +Signed-off-by: J. Bruce Fields +Signed-off-by: Greg Kroah-Hartman + +--- + net/sunrpc/auth_gss/svcauth_gss.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/sunrpc/auth_gss/svcauth_gss.c ++++ b/net/sunrpc/auth_gss/svcauth_gss.c +@@ -1224,6 +1224,7 @@ static int gss_proxy_save_rsc(struct cac + dprintk("RPC: No creds found!\n"); + goto out; + } else { ++ struct timespec64 boot; + + /* steal creds */ + rsci.cred = ud->creds; +@@ -1244,6 +1245,9 @@ static int gss_proxy_save_rsc(struct cac + &expiry, GFP_KERNEL); + if (status) + goto out; ++ ++ getboottime64(&boot); ++ expiry -= boot.tv_sec; + } + + rsci.h.expiry_time = expiry; diff --git a/queue-4.19/tools-kvm_stat-fix-kvm_exit-filter-name.patch b/queue-4.19/tools-kvm_stat-fix-kvm_exit-filter-name.patch new file mode 100644 index 00000000000..27ac792eed1 --- /dev/null +++ b/queue-4.19/tools-kvm_stat-fix-kvm_exit-filter-name.patch @@ -0,0 +1,73 @@ +From 5fcf3a55a62afb0760ccb6f391d62f20bce4a42f Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 10 Dec 2019 15:48:29 +1100 +Subject: tools/kvm_stat: Fix kvm_exit filter name + +From: Gavin Shan + +commit 5fcf3a55a62afb0760ccb6f391d62f20bce4a42f upstream. + +The filter name is fixed to "exit_reason" for some kvm_exit events, no +matter what architect we have. Actually, the filter name ("exit_reason") +is only applicable to x86, meaning it's broken on other architects +including aarch64. + +This fixes the issue by providing various kvm_exit filter names, depending +on architect we're on. Afterwards, the variable filter name is picked and +applied through ioctl(fd, SET_FILTER). + +Reported-by: Andrew Jones +Signed-off-by: Gavin Shan +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + tools/kvm/kvm_stat/kvm_stat | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/tools/kvm/kvm_stat/kvm_stat ++++ b/tools/kvm/kvm_stat/kvm_stat +@@ -271,6 +271,7 @@ class ArchX86(Arch): + def __init__(self, exit_reasons): + self.sc_perf_evt_open = 298 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = 'exit_reason' + self.exit_reasons = exit_reasons + + def debugfs_is_child(self, field): +@@ -290,6 +291,7 @@ class ArchPPC(Arch): + # numbers depend on the wordsize. + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 ++ self.exit_reason_field = 'exit_nr' + self.exit_reasons = {} + + def debugfs_is_child(self, field): +@@ -301,6 +303,7 @@ class ArchA64(Arch): + def __init__(self): + self.sc_perf_evt_open = 241 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = 'esr_ec' + self.exit_reasons = AARCH64_EXIT_REASONS + + def debugfs_is_child(self, field): +@@ -312,6 +315,7 @@ class ArchS390(Arch): + def __init__(self): + self.sc_perf_evt_open = 331 + self.ioctl_numbers = IOCTL_NUMBERS ++ self.exit_reason_field = None + self.exit_reasons = None + + def debugfs_is_child(self, field): +@@ -542,8 +546,8 @@ class TracepointProvider(Provider): + """ + filters = {} + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) +- if ARCH.exit_reasons: +- filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) ++ if ARCH.exit_reason_field and ARCH.exit_reasons: ++ filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) + return filters + + def _get_available_fields(self): diff --git a/queue-4.19/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch b/queue-4.19/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch new file mode 100644 index 00000000000..ca35261e3dd --- /dev/null +++ b/queue-4.19/watchdog-fix-uaf-in-reboot-notifier-handling-in-watchdog-core-code.patch @@ -0,0 +1,197 @@ +From 69503e585192fdd84b240f18a0873d20e18a2e0a Mon Sep 17 00:00:00 2001 +From: Vladis Dronov +Date: Wed, 8 Jan 2020 13:53:47 +0100 +Subject: watchdog: fix UAF in reboot notifier handling in watchdog core code + +From: Vladis Dronov + +commit 69503e585192fdd84b240f18a0873d20e18a2e0a upstream. + +After the commit 44ea39420fc9 ("drivers/watchdog: make use of +devm_register_reboot_notifier()") the struct notifier_block reboot_nb in +the struct watchdog_device is removed from the reboot notifiers chain at +the time watchdog's chardev is closed. But at least in i6300esb.c case +reboot_nb is embedded in the struct esb_dev which can be freed on its +device removal and before the chardev is closed, thus UAF at reboot: + +[ 7.728581] esb_probe: esb_dev.watchdog_device ffff91316f91ab28 +ts# uname -r note the address ^^^ +5.5.0-rc5-ae6088-wdog +ts# ./openwdog0 & +[1] 696 +ts# opened /dev/watchdog0, sleeping 10s... +ts# echo 1 > /sys/devices/pci0000\:00/0000\:00\:09.0/remove +[ 178.086079] devres:rel_nodes: dev ffff91317668a0b0 data ffff91316f91ab28 + esb_dev.watchdog_device.reboot_nb memory is freed here ^^^ +ts# ...woken up +[ 181.459010] devres:rel_nodes: dev ffff913171781000 data ffff913174a1dae8 +[ 181.460195] devm_unreg_reboot_notifier: res ffff913174a1dae8 nb ffff91316f91ab78 + attempt to use memory already freed ^^^ +[ 181.461063] devm_unreg_reboot_notifier: nb->call 6b6b6b6b6b6b6b6b +[ 181.461243] devm_unreg_reboot_notifier: nb->next 6b6b6b6b6b6b6b6b + freed memory is filled with a slub poison ^^^ +[1]+ Done ./openwdog0 +ts# reboot +[ 229.921862] systemd-shutdown[1]: Rebooting. +[ 229.939265] notifier_call_chain: nb ffffffff9c6c2f20 nb->next ffffffff9c6d50c0 +[ 229.943080] notifier_call_chain: nb ffffffff9c6d50c0 nb->next 6b6b6b6b6b6b6b6b +[ 229.946054] notifier_call_chain: nb 6b6b6b6b6b6b6b6b INVAL +[ 229.957584] general protection fault: 0000 [#1] SMP +[ 229.958770] CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.5.0-rc5-ae6088-wdog +[ 229.960224] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... +[ 229.963288] RIP: 0010:notifier_call_chain+0x66/0xd0 +[ 229.969082] RSP: 0018:ffffb20dc0013d88 EFLAGS: 00010246 +[ 229.970812] RAX: 000000000000002e RBX: 6b6b6b6b6b6b6b6b RCX: 00000000000008b3 +[ 229.972929] RDX: 0000000000000000 RSI: 0000000000000096 RDI: ffffffff9ccc46ac +[ 229.975028] RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000000008b3 +[ 229.977039] R10: 0000000000000001 R11: ffffffff9c26c740 R12: 0000000000000000 +[ 229.979155] R13: 6b6b6b6b6b6b6b6b R14: 0000000000000000 R15: 00000000fffffffa +... slub_debug=FZP poison ^^^ +[ 229.989089] Call Trace: +[ 229.990157] blocking_notifier_call_chain+0x43/0x59 +[ 229.991401] kernel_restart_prepare+0x14/0x30 +[ 229.992607] kernel_restart+0x9/0x30 +[ 229.993800] __do_sys_reboot+0x1d2/0x210 +[ 230.000149] do_syscall_64+0x3d/0x130 +[ 230.001277] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 230.002639] RIP: 0033:0x7f5461bdd177 +[ 230.016402] Modules linked in: i6300esb +[ 230.050261] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b + +Fix the crash by reverting 44ea39420fc9 so unregister_reboot_notifier() +is called when watchdog device is removed. This also makes handling of +the reboot notifier unified with the handling of the restart handler, +which is freed with unregister_restart_handler() in the same place. + +Fixes: 44ea39420fc9 ("drivers/watchdog: make use of devm_register_reboot_notifier()") +Cc: stable@vger.kernel.org # v4.15+ +Signed-off-by: Vladis Dronov +Reviewed-by: Guenter Roeck +Link: https://lore.kernel.org/r/20200108125347.6067-1-vdronov@redhat.com +Signed-off-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/watchdog/watchdog_core.c | 35 +++++++++++++++++++++++++++++++++++ + drivers/watchdog/watchdog_dev.c | 36 +----------------------------------- + 2 files changed, 36 insertions(+), 35 deletions(-) + +--- a/drivers/watchdog/watchdog_core.c ++++ b/drivers/watchdog/watchdog_core.c +@@ -138,6 +138,25 @@ int watchdog_init_timeout(struct watchdo + } + EXPORT_SYMBOL_GPL(watchdog_init_timeout); + ++static int watchdog_reboot_notifier(struct notifier_block *nb, ++ unsigned long code, void *data) ++{ ++ struct watchdog_device *wdd; ++ ++ wdd = container_of(nb, struct watchdog_device, reboot_nb); ++ if (code == SYS_DOWN || code == SYS_HALT) { ++ if (watchdog_active(wdd)) { ++ int ret; ++ ++ ret = wdd->ops->stop(wdd); ++ if (ret) ++ return NOTIFY_BAD; ++ } ++ } ++ ++ return NOTIFY_DONE; ++} ++ + static int watchdog_restart_notifier(struct notifier_block *nb, + unsigned long action, void *data) + { +@@ -226,6 +245,19 @@ static int __watchdog_register_device(st + } + } + ++ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { ++ wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; ++ ++ ret = register_reboot_notifier(&wdd->reboot_nb); ++ if (ret) { ++ pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", ++ wdd->id, ret); ++ watchdog_dev_unregister(wdd); ++ ida_simple_remove(&watchdog_ida, id); ++ return ret; ++ } ++ } ++ + if (wdd->ops->restart) { + wdd->restart_nb.notifier_call = watchdog_restart_notifier; + +@@ -271,6 +303,9 @@ static void __watchdog_unregister_device + if (wdd->ops->restart) + unregister_restart_handler(&wdd->restart_nb); + ++ if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) ++ unregister_reboot_notifier(&wdd->reboot_nb); ++ + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, wdd->id); + } +--- a/drivers/watchdog/watchdog_dev.c ++++ b/drivers/watchdog/watchdog_dev.c +@@ -42,7 +42,6 @@ + #include /* For handling misc devices */ + #include /* For module stuff/... */ + #include /* For mutexes */ +-#include /* For reboot notifier */ + #include /* For memory functions */ + #include /* For standard types (like size_t) */ + #include /* For watchdog specific items */ +@@ -1048,25 +1047,6 @@ static void watchdog_cdev_unregister(str + put_device(&wd_data->dev); + } + +-static int watchdog_reboot_notifier(struct notifier_block *nb, +- unsigned long code, void *data) +-{ +- struct watchdog_device *wdd; +- +- wdd = container_of(nb, struct watchdog_device, reboot_nb); +- if (code == SYS_DOWN || code == SYS_HALT) { +- if (watchdog_active(wdd)) { +- int ret; +- +- ret = wdd->ops->stop(wdd); +- if (ret) +- return NOTIFY_BAD; +- } +- } +- +- return NOTIFY_DONE; +-} +- + /* + * watchdog_dev_register: register a watchdog device + * @wdd: watchdog device +@@ -1085,22 +1065,8 @@ int watchdog_dev_register(struct watchdo + return ret; + + ret = watchdog_register_pretimeout(wdd); +- if (ret) { ++ if (ret) + watchdog_cdev_unregister(wdd); +- return ret; +- } +- +- if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { +- wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; +- +- ret = devm_register_reboot_notifier(&wdd->wd_data->dev, +- &wdd->reboot_nb); +- if (ret) { +- pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", +- wdd->id, ret); +- watchdog_dev_unregister(wdd); +- } +- } + + return ret; + } diff --git a/queue-4.19/xen-balloon-support-xend-based-toolstack-take-two.patch b/queue-4.19/xen-balloon-support-xend-based-toolstack-take-two.patch new file mode 100644 index 00000000000..9895c98954c --- /dev/null +++ b/queue-4.19/xen-balloon-support-xend-based-toolstack-take-two.patch @@ -0,0 +1,47 @@ +From eda4eabf86fd6806eaabc23fb90dd056fdac037b Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Fri, 17 Jan 2020 14:49:31 +0100 +Subject: xen/balloon: Support xend-based toolstack take two + +From: Juergen Gross + +commit eda4eabf86fd6806eaabc23fb90dd056fdac037b upstream. + +Commit 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack") +tried to fix a regression with running on rather ancient Xen versions. +Unfortunately the fix was based on the assumption that xend would +just use another Xenstore node, but in reality only some downstream +versions of xend are doing that. The upstream xend does not write +that Xenstore node at all, so the problem must be fixed in another +way. + +The easiest way to achieve that is to fall back to the behavior +before commit 96edd61dcf4436 ("xen/balloon: don't online new memory +initially") in case the static memory maximum can't be read. + +This is achieved by setting static_max to the current number of +memory pages known by the system resulting in target_diff becoming +zero. + +Fixes: 3aa6c19d2f38be ("xen/balloon: Support xend-based toolstack") +Signed-off-by: Juergen Gross +Reviewed-by: Boris Ostrovsky +Cc: # 4.13 +Signed-off-by: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xen-balloon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/xen/xen-balloon.c ++++ b/drivers/xen/xen-balloon.c +@@ -83,7 +83,7 @@ static void watch_target(struct xenbus_w + "%llu", &static_max) == 1)) + static_max >>= PAGE_SHIFT - 10; + else +- static_max = new_target; ++ static_max = balloon_stats.current_pages; + + target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 + : static_max - balloon_stats.target_pages; -- 2.47.3