From 225645830d034a04c5148069938136861df9392e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Apr 2020 16:23:21 +0200 Subject: [PATCH] 5.6-stable patches added patches: btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch btrfs-drop-block-from-cache-on-error-in-relocation.patch btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch btrfs-unset-reloc-control-if-we-fail-to-recover.patch btrfs-use-nofs-allocations-for-running-delayed-items.patch cifs-check-new-file-size-when-extending-file-by-fallocate.patch cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch drm-i915-gen12-disable-preemption-timeout.patch kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch mtd-rawnand-cadence-change-bad-block-marker-size.patch mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch smb3-fix-performance-regression-with-setting-mtime.patch --- ...btree-write-bio-if-the-fs-has-errors.patch | 227 +++++++++++++++++ ...ck-from-cache-on-error-in-relocation.patch | 41 ++++ ...lc_reclaim_metadata_size-calculation.patch | 122 +++++++++ ...e-to-race-with-delayed-inode-workers.patch | 222 +++++++++++++++++ ...ent-item-for-hole-after-ranged-fsync.patch | 103 ++++++++ ...-semaphore-unlock-in-btrfs_sync_file.patch | 35 +++ ...ls-if-we-fail-to-start-a-transaction.patch | 51 ++++ ...-uuid-generation-as-soon-as-possible.patch | 64 +++++ ...-reloc-control-if-we-fail-to-recover.patch | 70 ++++++ ...llocations-for-running-delayed-items.patch | 232 ++++++++++++++++++ ...ize-when-extending-file-by-fallocate.patch | 40 +++ ...-value-by-asynchronous-read-is-error.patch | 63 +++++ ...915-gen12-disable-preemption-timeout.patch | 96 ++++++++ ...e-userspace-interrupt-window-request.patch | 163 ++++++++++++ ..._free-if-ultravisor-is-not-supported.patch | 71 ++++++ ...ix-delivery-of-addressing-exceptions.patch | 50 ++++ ...-1-asce-sanity-shadow-address-checks.patch | 56 +++++ ...mpoline-to-fix-vmread-error-handling.patch | 149 +++++++++++ ...cses-during-crash-with-kexec-support.patch | 180 ++++++++++++++ ...x-crash-cleanup-when-kvm-wasn-t-used.patch | 75 ++++++ ...ge-page-tracking-when-moving-memslot.patch | 102 ++++++++ ...vmalloc-failure-during-vm-allocation.patch | 50 ++++ ...cadence-change-bad-block-marker-size.patch | 42 ++++ ...alculation-of-the-avaialble-oob-size.patch | 43 ++++ ...etion-before-executing-a-new-command.patch | 33 +++ ...ck-before-writing-a-bad-block-marker.patch | 50 ++++ ...bbuf-for-buffering-bad-block-markers.patch | 85 +++++++ ...here-the-first-battery-is-named-batt.patch | 41 ++++ ...r-dereference-in-rproc_virtio_notify.patch | 122 +++++++++ ...n-t-reassign-mpss-region-on-shutdown.patch | 100 ++++++++ ...ss-reload-the-mba-region-on-coredump.patch | 67 +++++ queue-5.6/series | 32 +++ ...rmance-regression-with-setting-mtime.patch | 65 +++++ 33 files changed, 2942 insertions(+) create mode 100644 queue-5.6/btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch create mode 100644 queue-5.6/btrfs-drop-block-from-cache-on-error-in-relocation.patch create mode 100644 queue-5.6/btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch create mode 100644 queue-5.6/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch create mode 100644 queue-5.6/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch create mode 100644 queue-5.6/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch create mode 100644 queue-5.6/btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch create mode 100644 queue-5.6/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch create mode 100644 queue-5.6/btrfs-unset-reloc-control-if-we-fail-to-recover.patch create mode 100644 queue-5.6/btrfs-use-nofs-allocations-for-running-delayed-items.patch create mode 100644 queue-5.6/cifs-check-new-file-size-when-extending-file-by-fallocate.patch create mode 100644 queue-5.6/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch create mode 100644 queue-5.6/drm-i915-gen12-disable-preemption-timeout.patch create mode 100644 queue-5.6/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch create mode 100644 queue-5.6/kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch create mode 100644 queue-5.6/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch create mode 100644 queue-5.6/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch create mode 100644 queue-5.6/kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch create mode 100644 queue-5.6/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch create mode 100644 queue-5.6/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch create mode 100644 queue-5.6/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch create mode 100644 queue-5.6/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch create mode 100644 queue-5.6/mtd-rawnand-cadence-change-bad-block-marker-size.patch create mode 100644 queue-5.6/mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch create mode 100644 queue-5.6/mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch create mode 100644 queue-5.6/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch create mode 100644 queue-5.6/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch create mode 100644 queue-5.6/platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch create mode 100644 queue-5.6/remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch create mode 100644 queue-5.6/remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch create mode 100644 queue-5.6/remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch create mode 100644 queue-5.6/smb3-fix-performance-regression-with-setting-mtime.patch diff --git a/queue-5.6/btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch b/queue-5.6/btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch new file mode 100644 index 00000000000..2da17499ae6 --- /dev/null +++ b/queue-5.6/btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch @@ -0,0 +1,227 @@ +From b3ff8f1d380e65dddd772542aa9bff6c86bf715a Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 12 Feb 2020 14:12:44 +0800 +Subject: btrfs: Don't submit any btree write bio if the fs has errors + +From: Qu Wenruo + +commit b3ff8f1d380e65dddd772542aa9bff6c86bf715a upstream. + +[BUG] +There is a fuzzed image which could cause KASAN report at unmount time. + + BUG: KASAN: use-after-free in btrfs_queue_work+0x2c1/0x390 + Read of size 8 at addr ffff888067cf6848 by task umount/1922 + + CPU: 0 PID: 1922 Comm: umount Tainted: G W 5.0.21 #1 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 + Call Trace: + dump_stack+0x5b/0x8b + print_address_description+0x70/0x280 + kasan_report+0x13a/0x19b + btrfs_queue_work+0x2c1/0x390 + btrfs_wq_submit_bio+0x1cd/0x240 + btree_submit_bio_hook+0x18c/0x2a0 + submit_one_bio+0x1be/0x320 + flush_write_bio.isra.41+0x2c/0x70 + btree_write_cache_pages+0x3bb/0x7f0 + do_writepages+0x5c/0x130 + __writeback_single_inode+0xa3/0x9a0 + writeback_single_inode+0x23d/0x390 + write_inode_now+0x1b5/0x280 + iput+0x2ef/0x600 + close_ctree+0x341/0x750 + generic_shutdown_super+0x126/0x370 + kill_anon_super+0x31/0x50 + btrfs_kill_super+0x36/0x2b0 + deactivate_locked_super+0x80/0xc0 + deactivate_super+0x13c/0x150 + cleanup_mnt+0x9a/0x130 + task_work_run+0x11a/0x1b0 + exit_to_usermode_loop+0x107/0x130 + do_syscall_64+0x1e5/0x280 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +[CAUSE] +The fuzzed image has a completely screwd up extent tree: + + leaf 29421568 gen 8 total ptrs 6 free space 3587 owner EXTENT_TREE + refs 2 lock (w:0 r:0 bw:0 br:0 sw:0 sr:0) lock_owner 0 current 5938 + item 0 key (12587008 168 4096) itemoff 3942 itemsize 53 + extent refs 1 gen 9 flags 1 + ref#0: extent data backref root 5 objectid 259 offset 0 count 1 + item 1 key (12591104 168 8192) itemoff 3889 itemsize 53 + extent refs 1 gen 9 flags 1 + ref#0: extent data backref root 5 objectid 271 offset 0 count 1 + item 2 key (12599296 168 4096) itemoff 3836 itemsize 53 + extent refs 1 gen 9 flags 1 + ref#0: extent data backref root 5 objectid 259 offset 4096 count 1 + item 3 key (29360128 169 0) itemoff 3803 itemsize 33 + extent refs 1 gen 9 flags 2 + ref#0: tree block backref root 5 + item 4 key (29368320 169 1) itemoff 3770 itemsize 33 + extent refs 1 gen 9 flags 2 + ref#0: tree block backref root 5 + item 5 key (29372416 169 0) itemoff 3737 itemsize 33 + extent refs 1 gen 9 flags 2 + ref#0: tree block backref root 5 + +Note that leaf 29421568 doesn't have its backref in the extent tree. +Thus extent allocator can re-allocate leaf 29421568 for other trees. + +In short, the bug is caused by: + +- Existing tree block gets allocated to log tree + This got its generation bumped. + +- Log tree balance cleaned dirty bit of offending tree block + It will not be written back to disk, thus no WRITTEN flag. + +- Original owner of the tree block gets COWed + Since the tree block has higher transid, no WRITTEN flag, it's reused, + and not traced by transaction::dirty_pages. + +- Transaction aborted + Tree blocks get cleaned according to transaction::dirty_pages. But the + offending tree block is not recorded at all. + +- Filesystem unmount + All pages are assumed to be are clean, destroying all workqueue, then + call iput(btree_inode). + But offending tree block is still dirty, which triggers writeback, and + causes use-after-free bug. + +The detailed sequence looks like this: + +- Initial status + eb: 29421568, header=WRITTEN bflags_dirty=0, page_dirty=0, gen=8, + not traced by any dirty extent_iot_tree. + +- New tree block is allocated + Since there is no backref for 29421568, it's re-allocated as new tree + block. + Keep in mind that tree block 29421568 is still referred by extent + tree. + +- Tree block 29421568 is filled for log tree + eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 << (gen bumped) + traced by btrfs_root::dirty_log_pages + +- Some log tree operations + Since the fs is using node size 4096, the log tree can easily go a + level higher. + +- Log tree needs balance + Tree block 29421568 gets all its content pushed to right, thus now + it is empty, and we don't need it. + btrfs_clean_tree_block() from __push_leaf_right() get called. + + eb: 29421568, header=0 bflags_dirty=0, page_dirty=0, gen=9 + traced by btrfs_root::dirty_log_pages + +- Log tree write back + btree_write_cache_pages() goes through dirty pages ranges, but since + page of tree block 29421568 gets cleaned already, it's not written + back to disk. Thus it doesn't have WRITTEN bit set. + But ranges in dirty_log_pages are cleared. + + eb: 29421568, header=0 bflags_dirty=0, page_dirty=0, gen=9 + not traced by any dirty extent_iot_tree. + +- Extent tree update when committing transaction + Since tree block 29421568 has transid equal to running trans, and has + no WRITTEN bit, should_cow_block() will use it directly without adding + it to btrfs_transaction::dirty_pages. + + eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 + not traced by any dirty extent_iot_tree. + + At this stage, we're doomed. We have a dirty eb not tracked by any + extent io tree. + +- Transaction gets aborted due to corrupted extent tree + Btrfs cleans up dirty pages according to transaction::dirty_pages and + btrfs_root::dirty_log_pages. + But since tree block 29421568 is not tracked by neither of them, it's + still dirty. + + eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 + not traced by any dirty extent_iot_tree. + +- Filesystem unmount + Since all cleanup is assumed to be done, all workqueus are destroyed. + Then iput(btree_inode) is called, expecting no dirty pages. + But tree 29421568 is still dirty, thus triggering writeback. + Since all workqueues are already freed, we cause use-after-free. + +This shows us that, log tree blocks + bad extent tree can cause wild +dirty pages. + +[FIX] +To fix the problem, don't submit any btree write bio if the filesytem +has any error. This is the last safe net, just in case other cleanup +haven't caught catch it. + +Link: https://github.com/bobfuzzer/CVE/tree/master/CVE-2019-19377 +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent_io.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -3905,6 +3905,7 @@ int btree_write_cache_pages(struct addre + .extent_locked = 0, + .sync_io = wbc->sync_mode == WB_SYNC_ALL, + }; ++ struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; + int ret = 0; + int done = 0; + int nr_to_write_done = 0; +@@ -4018,7 +4019,39 @@ retry: + end_write_bio(&epd, ret); + return ret; + } +- ret = flush_write_bio(&epd); ++ /* ++ * If something went wrong, don't allow any metadata write bio to be ++ * submitted. ++ * ++ * This would prevent use-after-free if we had dirty pages not ++ * cleaned up, which can still happen by fuzzed images. ++ * ++ * - Bad extent tree ++ * Allowing existing tree block to be allocated for other trees. ++ * ++ * - Log tree operations ++ * Exiting tree blocks get allocated to log tree, bumps its ++ * generation, then get cleaned in tree re-balance. ++ * Such tree block will not be written back, since it's clean, ++ * thus no WRITTEN flag set. ++ * And after log writes back, this tree block is not traced by ++ * any dirty extent_io_tree. ++ * ++ * - Offending tree block gets re-dirtied from its original owner ++ * Since it has bumped generation, no WRITTEN flag, it can be ++ * reused without COWing. This tree block will not be traced ++ * by btrfs_transaction::dirty_pages. ++ * ++ * Now such dirty tree block will not be cleaned by any dirty ++ * extent io tree. Thus we don't want to submit such wild eb ++ * if the fs already has error. ++ */ ++ if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { ++ ret = flush_write_bio(&epd); ++ } else { ++ ret = -EUCLEAN; ++ end_write_bio(&epd, ret); ++ } + return ret; + } + diff --git a/queue-5.6/btrfs-drop-block-from-cache-on-error-in-relocation.patch b/queue-5.6/btrfs-drop-block-from-cache-on-error-in-relocation.patch new file mode 100644 index 00000000000..b47a0d7a02c --- /dev/null +++ b/queue-5.6/btrfs-drop-block-from-cache-on-error-in-relocation.patch @@ -0,0 +1,41 @@ +From 8e19c9732ad1d127b5575a10f4fbcacf740500ff Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 4 Mar 2020 11:18:23 -0500 +Subject: btrfs: drop block from cache on error in relocation + +From: Josef Bacik + +commit 8e19c9732ad1d127b5575a10f4fbcacf740500ff upstream. + +If we have an error while building the backref tree in relocation we'll +process all the pending edges and then free the node. However if we +integrated some edges into the cache we'll lose our link to those edges +by simply freeing this node, which means we'll leak memory and +references to any roots that we've found. + +Instead we need to use remove_backref_node(), which walks through all of +the edges that are still linked to this node and free's them up and +drops any root references we may be holding. + +CC: stable@vger.kernel.org # 4.9+ +Reviewed-by: Qu Wenruo +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -1186,7 +1186,7 @@ out: + free_backref_node(cache, lower); + } + +- free_backref_node(cache, node); ++ remove_backref_node(cache, node); + return ERR_PTR(err); + } + ASSERT(!node || !node->detached); diff --git a/queue-5.6/btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch b/queue-5.6/btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch new file mode 100644 index 00000000000..26b8ec289e4 --- /dev/null +++ b/queue-5.6/btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch @@ -0,0 +1,122 @@ +From fa121a26b2ceabce613e0b4cfc7498cfde73fe8d Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 21 Feb 2020 16:41:10 -0500 +Subject: btrfs: fix btrfs_calc_reclaim_metadata_size calculation + +From: Josef Bacik + +commit fa121a26b2ceabce613e0b4cfc7498cfde73fe8d upstream. + +I noticed while running my snapshot torture test that we were getting a +lot of metadata chunks allocated with very little actually used. +Digging into this we would commit the transaction, still not have enough +space, and then force a chunk allocation. + +I noticed that we were barely flushing any delalloc at all, despite the +fact that we had around 13gib of outstanding delalloc reservations. It +turns out this is because of our btrfs_calc_reclaim_metadata_size() +calculation. It _only_ takes into account the outstanding ticket sizes, +which isn't the whole story. In this particular workload we're slowly +filling up the disk, which means our overcommit space will suddenly +become a lot less, and our outstanding reservations will be well more +than what we can handle. However we are only flushing based on our +ticket size, which is much less than we need to actually reclaim. + +So fix btrfs_calc_reclaim_metadata_size() to take into account the +overage in the case that we've gotten less available space suddenly. +This makes it so we attempt to reclaim a lot more delalloc space, which +allows us to make our reservations and we no longer are allocating a +bunch of needless metadata chunks. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/space-info.c | 43 ++++++++++++++++++++++++++++++++++--------- + 1 file changed, 34 insertions(+), 9 deletions(-) + +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -159,25 +159,19 @@ static inline u64 calc_global_rsv_need_s + return (global->size << 1); + } + +-int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, +- struct btrfs_space_info *space_info, u64 bytes, +- enum btrfs_reserve_flush_enum flush) ++static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, ++ struct btrfs_space_info *space_info, ++ enum btrfs_reserve_flush_enum flush) + { + u64 profile; + u64 avail; +- u64 used; + int factor; + +- /* Don't overcommit when in mixed mode. */ +- if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) +- return 0; +- + if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM) + profile = btrfs_system_alloc_profile(fs_info); + else + profile = btrfs_metadata_alloc_profile(fs_info); + +- used = btrfs_space_info_used(space_info, true); + avail = atomic64_read(&fs_info->free_chunk_space); + + /* +@@ -198,6 +192,22 @@ int btrfs_can_overcommit(struct btrfs_fs + avail >>= 3; + else + avail >>= 1; ++ return avail; ++} ++ ++int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, ++ struct btrfs_space_info *space_info, u64 bytes, ++ enum btrfs_reserve_flush_enum flush) ++{ ++ u64 avail; ++ u64 used; ++ ++ /* Don't overcommit when in mixed mode */ ++ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) ++ return 0; ++ ++ used = btrfs_space_info_used(space_info, true); ++ avail = calc_available_free_space(fs_info, space_info, flush); + + if (used + bytes < space_info->total_bytes + avail) + return 1; +@@ -629,6 +639,7 @@ btrfs_calc_reclaim_metadata_size(struct + { + struct reserve_ticket *ticket; + u64 used; ++ u64 avail; + u64 expected; + u64 to_reclaim = 0; + +@@ -636,6 +647,20 @@ btrfs_calc_reclaim_metadata_size(struct + to_reclaim += ticket->bytes; + list_for_each_entry(ticket, &space_info->priority_tickets, list) + to_reclaim += ticket->bytes; ++ ++ avail = calc_available_free_space(fs_info, space_info, ++ BTRFS_RESERVE_FLUSH_ALL); ++ used = btrfs_space_info_used(space_info, true); ++ ++ /* ++ * We may be flushing because suddenly we have less space than we had ++ * before, and now we're well over-committed based on our current free ++ * space. If that's the case add in our overage so we make sure to put ++ * appropriate pressure on the flushing state machine. ++ */ ++ if (space_info->total_bytes + avail < used) ++ to_reclaim += used - (space_info->total_bytes + avail); ++ + if (to_reclaim) + return to_reclaim; + diff --git a/queue-5.6/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch b/queue-5.6/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch new file mode 100644 index 00000000000..67e0c04304e --- /dev/null +++ b/queue-5.6/btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch @@ -0,0 +1,222 @@ +From f0cc2cd70164efe8f75c5d99560f0f69969c72e4 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 28 Feb 2020 13:04:36 +0000 +Subject: Btrfs: fix crash during unmount due to race with delayed inode workers + +From: Filipe Manana + +commit f0cc2cd70164efe8f75c5d99560f0f69969c72e4 upstream. + +During unmount we can have a job from the delayed inode items work queue +still running, that can lead to at least two bad things: + +1) A crash, because the worker can try to create a transaction just + after the fs roots were freed; + +2) A transaction leak, because the worker can create a transaction + before the fs roots are freed and just after we committed the last + transaction and after we stopped the transaction kthread. + +A stack trace example of the crash: + + [79011.691214] kernel BUG at lib/radix-tree.c:982! + [79011.692056] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI + [79011.693180] CPU: 3 PID: 1394 Comm: kworker/u8:2 Tainted: G W 5.6.0-rc2-btrfs-next-54 #2 + (...) + [79011.696789] Workqueue: btrfs-delayed-meta btrfs_work_helper [btrfs] + [79011.697904] RIP: 0010:radix_tree_tag_set+0xe7/0x170 + (...) + [79011.702014] RSP: 0018:ffffb3c84a317ca0 EFLAGS: 00010293 + [79011.702949] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 + [79011.704202] RDX: ffffb3c84a317cb0 RSI: ffffb3c84a317ca8 RDI: ffff8db3931340a0 + [79011.705463] RBP: 0000000000000005 R08: 0000000000000005 R09: ffffffff974629d0 + [79011.706756] R10: ffffb3c84a317bc0 R11: 0000000000000001 R12: ffff8db393134000 + [79011.708010] R13: ffff8db3931340a0 R14: ffff8db393134068 R15: 0000000000000001 + [79011.709270] FS: 0000000000000000(0000) GS:ffff8db3b6a00000(0000) knlGS:0000000000000000 + [79011.710699] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [79011.711710] CR2: 00007f22c2a0a000 CR3: 0000000232ad4005 CR4: 00000000003606e0 + [79011.712958] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + [79011.714205] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + [79011.715448] Call Trace: + [79011.715925] record_root_in_trans+0x72/0xf0 [btrfs] + [79011.716819] btrfs_record_root_in_trans+0x4b/0x70 [btrfs] + [79011.717925] start_transaction+0xdd/0x5c0 [btrfs] + [79011.718829] btrfs_async_run_delayed_root+0x17e/0x2b0 [btrfs] + [79011.719915] btrfs_work_helper+0xaa/0x720 [btrfs] + [79011.720773] process_one_work+0x26d/0x6a0 + [79011.721497] worker_thread+0x4f/0x3e0 + [79011.722153] ? process_one_work+0x6a0/0x6a0 + [79011.722901] kthread+0x103/0x140 + [79011.723481] ? kthread_create_worker_on_cpu+0x70/0x70 + [79011.724379] ret_from_fork+0x3a/0x50 + (...) + +The following diagram shows a sequence of steps that lead to the crash +during ummount of the filesystem: + + CPU 1 CPU 2 CPU 3 + + btrfs_punch_hole() + btrfs_btree_balance_dirty() + btrfs_balance_delayed_items() + --> sees + fs_info->delayed_root->items + with value 200, which is greater + than + BTRFS_DELAYED_BACKGROUND (128) + and smaller than + BTRFS_DELAYED_WRITEBACK (512) + btrfs_wq_run_delayed_node() + --> queues a job for + fs_info->delayed_workers to run + btrfs_async_run_delayed_root() + + btrfs_async_run_delayed_root() + --> job queued by CPU 1 + + --> starts picking and running + delayed nodes from the + prepare_list list + + close_ctree() + + btrfs_delete_unused_bgs() + + btrfs_commit_super() + + btrfs_join_transaction() + --> gets transaction N + + btrfs_commit_transaction(N) + --> set transaction state + to TRANTS_STATE_COMMIT_START + + btrfs_first_prepared_delayed_node() + --> picks delayed node X through + the prepared_list list + + btrfs_run_delayed_items() + + btrfs_first_delayed_node() + --> also picks delayed node X + but through the node_list + list + + __btrfs_commit_inode_delayed_items() + --> runs all delayed items from + this node and drops the + node's item count to 0 + through call to + btrfs_release_delayed_inode() + + --> finishes running any remaining + delayed nodes + + --> finishes transaction commit + + --> stops cleaner and transaction threads + + btrfs_free_fs_roots() + --> frees all roots and removes them + from the radix tree + fs_info->fs_roots_radix + + btrfs_join_transaction() + start_transaction() + btrfs_record_root_in_trans() + record_root_in_trans() + radix_tree_tag_set() + --> crashes because + the root is not in + the radix tree + anymore + +If the worker is able to call btrfs_join_transaction() before the unmount +task frees the fs roots, we end up leaking a transaction and all its +resources, since after the call to btrfs_commit_super() and stopping the +transaction kthread, we don't expect to have any transaction open anymore. + +When this situation happens the worker has a delayed node that has no +more items to run, since the task calling btrfs_run_delayed_items(), +which is doing a transaction commit, picks the same node and runs all +its items first. + +We can not wait for the worker to complete when running delayed items +through btrfs_run_delayed_items(), because we call that function in +several phases of a transaction commit, and that could cause a deadlock +because the worker calls btrfs_join_transaction() and the task doing the +transaction commit may have already set the transaction state to +TRANS_STATE_COMMIT_DOING. + +Also it's not possible to get into a situation where only some of the +items of a delayed node are added to the fs/subvolume tree in the current +transaction and the remaining ones in the next transaction, because when +running the items of a delayed inode we lock its mutex, effectively +waiting for the worker if the worker is running the items of the delayed +node already. + +Since this can only cause issues when unmounting a filesystem, fix it in +a simple way by waiting for any jobs on the delayed workers queue before +calling btrfs_commit_supper() at close_ctree(). This works because at this +point no one can call btrfs_btree_balance_dirty() or +btrfs_balance_delayed_items(), and if we end up waiting for any worker to +complete, btrfs_commit_super() will commit the transaction created by the +worker. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/async-thread.c | 8 ++++++++ + fs/btrfs/async-thread.h | 1 + + fs/btrfs/disk-io.c | 13 +++++++++++++ + 3 files changed, 22 insertions(+) + +--- a/fs/btrfs/async-thread.c ++++ b/fs/btrfs/async-thread.c +@@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct + { + set_bit(WORK_HIGH_PRIO_BIT, &work->flags); + } ++ ++void btrfs_flush_workqueue(struct btrfs_workqueue *wq) ++{ ++ if (wq->high) ++ flush_workqueue(wq->high->normal_wq); ++ ++ flush_workqueue(wq->normal->normal_wq); ++} +--- a/fs/btrfs/async-thread.h ++++ b/fs/btrfs/async-thread.h +@@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct + struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work); + struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq); + bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq); ++void btrfs_flush_workqueue(struct btrfs_workqueue *wq); + + #endif +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3990,6 +3990,19 @@ void __cold close_ctree(struct btrfs_fs_ + */ + btrfs_delete_unused_bgs(fs_info); + ++ /* ++ * There might be existing delayed inode workers still running ++ * and holding an empty delayed inode item. We must wait for ++ * them to complete first because they can create a transaction. ++ * This happens when someone calls btrfs_balance_delayed_items() ++ * and then a transaction commit runs the same delayed nodes ++ * before any delayed worker has done something with the nodes. ++ * We must wait for any worker here and not at transaction ++ * commit time since that could cause a deadlock. ++ * This is a very rare case. ++ */ ++ btrfs_flush_workqueue(fs_info->delayed_workers); ++ + ret = btrfs_commit_super(fs_info); + if (ret) + btrfs_err(fs_info, "commit super ret %d", ret); diff --git a/queue-5.6/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch b/queue-5.6/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch new file mode 100644 index 00000000000..86dadcfd3d3 --- /dev/null +++ b/queue-5.6/btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch @@ -0,0 +1,103 @@ +From 95418ed1d10774cd9a49af6f39e216c1256f1eeb Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 9 Mar 2020 12:41:05 +0000 +Subject: btrfs: fix missing file extent item for hole after ranged fsync + +From: Filipe Manana + +commit 95418ed1d10774cd9a49af6f39e216c1256f1eeb upstream. + +When doing a fast fsync for a range that starts at an offset greater than +zero, we can end up with a log that when replayed causes the respective +inode miss a file extent item representing a hole if we are not using the +NO_HOLES feature. This is because for fast fsyncs we don't log any extents +that cover a range different from the one requested in the fsync. + +Example scenario to trigger it: + + $ mkfs.btrfs -O ^no-holes -f /dev/sdd + $ mount /dev/sdd /mnt + + # Create a file with a single 256K and fsync it to clear to full sync + # bit in the inode - we want the msync below to trigger a fast fsync. + $ xfs_io -f -c "pwrite -S 0xab 0 256K" -c "fsync" /mnt/foo + + # Force a transaction commit and wipe out the log tree. + $ sync + + # Dirty 768K of data, increasing the file size to 1Mb, and flush only + # the range from 256K to 512K without updating the log tree + # (sync_file_range() does not trigger fsync, it only starts writeback + # and waits for it to finish). + + $ xfs_io -c "pwrite -S 0xcd 256K 768K" /mnt/foo + $ xfs_io -c "sync_range -abw 256K 256K" /mnt/foo + + # Now dirty the range from 768K to 1M again and sync that range. + $ xfs_io -c "mmap -w 768K 256K" \ + -c "mwrite -S 0xef 768K 256K" \ + -c "msync -s 768K 256K" \ + -c "munmap" \ + /mnt/foo + + + + # Mount to replay the log. + $ mount /dev/sdd /mnt + $ umount /mnt + + $ btrfs check /dev/sdd + Opening filesystem to check... + Checking filesystem on /dev/sdd + UUID: 482fb574-b288-478e-a190-a9c44a78fca6 + [1/7] checking root items + [2/7] checking extents + [3/7] checking free space cache + [4/7] checking fs roots + root 5 inode 257 errors 100, file extent discount + Found file extent holes: + start: 262144, len: 524288 + ERROR: errors found in fs roots + found 720896 bytes used, error(s) found + total csum bytes: 512 + total tree bytes: 131072 + total fs tree bytes: 32768 + total extent tree bytes: 16384 + btree space waste bytes: 123514 + file data blocks allocated: 589824 + referenced 589824 + +Fix this issue by setting the range to full (0 to LLONG_MAX) when the +NO_HOLES feature is not enabled. This results in extra work being done +but it gives the guarantee we don't end up with missing holes after +replaying the log. + +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2071,6 +2071,16 @@ int btrfs_sync_file(struct file *file, l + btrfs_init_log_ctx(&ctx, inode); + + /* ++ * Set the range to full if the NO_HOLES feature is not enabled. ++ * This is to avoid missing file extent items representing holes after ++ * replaying the log. ++ */ ++ if (!btrfs_fs_incompat(fs_info, NO_HOLES)) { ++ start = 0; ++ end = LLONG_MAX; ++ } ++ ++ /* + * We write the dirty pages in the range and wait until they complete + * out of the ->i_mutex. If so, we can flush the dirty pages by + * multi-task, and make the performance up. See diff --git a/queue-5.6/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch b/queue-5.6/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch new file mode 100644 index 00000000000..f4d3e8fd94a --- /dev/null +++ b/queue-5.6/btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch @@ -0,0 +1,35 @@ +From 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb Mon Sep 17 00:00:00 2001 +From: Robbie Ko +Date: Tue, 17 Mar 2020 14:31:02 +0800 +Subject: btrfs: fix missing semaphore unlock in btrfs_sync_file + +From: Robbie Ko + +commit 6ff06729c22ec0b7498d900d79cc88cfb8aceaeb upstream. + +Ordered ops are started twice in sync file, once outside of inode mutex +and once inside, taking the dio semaphore. There was one error path +missing the semaphore unlock. + +Fixes: aab15e8ec2576 ("Btrfs: fix rare chances for data loss when doing a fast fsync") +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Robbie Ko +Reviewed-by: Filipe Manana +[ add changelog ] +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/file.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -2134,6 +2134,7 @@ int btrfs_sync_file(struct file *file, l + */ + ret = start_ordered_ops(inode, start, end); + if (ret) { ++ up_write(&BTRFS_I(inode)->dio_sem); + inode_unlock(inode); + goto out; + } diff --git a/queue-5.6/btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch b/queue-5.6/btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch new file mode 100644 index 00000000000..2a9398a0d46 --- /dev/null +++ b/queue-5.6/btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch @@ -0,0 +1,51 @@ +From 6217b0fadd4473a16fabc6aecd7527a9f71af534 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 4 Mar 2020 11:18:27 -0500 +Subject: btrfs: reloc: clean dirty subvols if we fail to start a transaction + +From: Josef Bacik + +commit 6217b0fadd4473a16fabc6aecd7527a9f71af534 upstream. + +If we do merge_reloc_roots() we could insert a few roots onto the dirty +subvol roots list, where we hold a ref on them. If we fail to start the +transaction we need to run clean_dirty_subvols() in order to cleanup the +refs. + +CC: stable@vger.kernel.org # 5.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -4221,10 +4221,10 @@ restart: + goto out_free; + } + btrfs_commit_transaction(trans); ++out_free: + ret = clean_dirty_subvols(rc); + if (ret < 0 && !err) + err = ret; +-out_free: + btrfs_free_block_rsv(fs_info, rc->block_rsv); + btrfs_free_path(path); + return err; +@@ -4634,10 +4634,10 @@ int btrfs_recover_relocation(struct btrf + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); +- goto out_free; ++ goto out_clean; + } + err = btrfs_commit_transaction(trans); +- ++out_clean: + ret = clean_dirty_subvols(rc); + if (ret < 0 && !err) + err = ret; diff --git a/queue-5.6/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch b/queue-5.6/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch new file mode 100644 index 00000000000..5145ca0c5c7 --- /dev/null +++ b/queue-5.6/btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch @@ -0,0 +1,64 @@ +From 75ec1db8717a8f0a9d9c8d033e542fdaa7b73898 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 14 Feb 2020 15:22:06 -0500 +Subject: btrfs: set update the uuid generation as soon as possible + +From: Josef Bacik + +commit 75ec1db8717a8f0a9d9c8d033e542fdaa7b73898 upstream. + +In my EIO stress testing I noticed I was getting forced to rescan the +uuid tree pretty often, which was weird. This is because my error +injection stuff would sometimes inject an error after log replay but +before we loaded the UUID tree. If log replay committed the transaction +it wouldn't have updated the uuid tree generation, but the tree was +valid and didn't change, so there's no reason to not update the +generation here. + +Fix this by setting the BTRFS_FS_UPDATE_UUID_TREE_GEN bit immediately +after reading all the fs roots if the uuid tree generation matches the +fs generation. Then any transaction commits that happen during mount +won't screw up our uuid tree state, forcing us to do needless uuid +rescans. + +Fixes: 70f801754728 ("Btrfs: check UUID tree during mount if required") +CC: stable@vger.kernel.org # 4.19+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3061,6 +3061,18 @@ int __cold open_ctree(struct super_block + if (ret) + goto fail_tree_roots; + ++ /* ++ * If we have a uuid root and we're not being told to rescan we need to ++ * check the generation here so we can set the ++ * BTRFS_FS_UPDATE_UUID_TREE_GEN bit. Otherwise we could commit the ++ * transaction during a balance or the log replay without updating the ++ * uuid generation, and then if we crash we would rescan the uuid tree, ++ * even though it was perfectly fine. ++ */ ++ if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) && ++ fs_info->generation == btrfs_super_uuid_tree_generation(disk_super)) ++ set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); ++ + ret = btrfs_verify_dev_extents(fs_info); + if (ret) { + btrfs_err(fs_info, +@@ -3285,8 +3297,6 @@ int __cold open_ctree(struct super_block + close_ctree(fs_info); + return ret; + } +- } else { +- set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); + } + set_bit(BTRFS_FS_OPEN, &fs_info->flags); + diff --git a/queue-5.6/btrfs-unset-reloc-control-if-we-fail-to-recover.patch b/queue-5.6/btrfs-unset-reloc-control-if-we-fail-to-recover.patch new file mode 100644 index 00000000000..8d8856b6962 --- /dev/null +++ b/queue-5.6/btrfs-unset-reloc-control-if-we-fail-to-recover.patch @@ -0,0 +1,70 @@ +From fb2d83eefef4e1c717205bac71cb1941edf8ae11 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 4 Mar 2020 11:18:25 -0500 +Subject: btrfs: unset reloc control if we fail to recover + +From: Josef Bacik + +commit fb2d83eefef4e1c717205bac71cb1941edf8ae11 upstream. + +If we fail to load an fs root, or fail to start a transaction we can +bail without unsetting the reloc control, which leads to problems later +when we free the reloc control but still have it attached to the file +system. + +In the normal path we'll end up calling unset_reloc_control() twice, but +all it does is set fs_info->reloc_control = NULL, and we can only have +one balance at a time so it's not racey. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/relocation.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -4593,9 +4593,8 @@ int btrfs_recover_relocation(struct btrf + + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) { +- unset_reloc_control(rc); + err = PTR_ERR(trans); +- goto out_free; ++ goto out_unset; + } + + rc->merge_reloc_tree = 1; +@@ -4615,7 +4614,7 @@ int btrfs_recover_relocation(struct btrf + if (IS_ERR(fs_root)) { + err = PTR_ERR(fs_root); + list_add_tail(&reloc_root->root_list, &reloc_roots); +- goto out_free; ++ goto out_unset; + } + + err = __add_reloc_root(reloc_root); +@@ -4625,7 +4624,7 @@ int btrfs_recover_relocation(struct btrf + + err = btrfs_commit_transaction(trans); + if (err) +- goto out_free; ++ goto out_unset; + + merge_reloc_roots(rc); + +@@ -4641,7 +4640,8 @@ out_clean: + ret = clean_dirty_subvols(rc); + if (ret < 0 && !err) + err = ret; +-out_free: ++out_unset: ++ unset_reloc_control(rc); + kfree(rc); + out: + if (!list_empty(&reloc_roots)) diff --git a/queue-5.6/btrfs-use-nofs-allocations-for-running-delayed-items.patch b/queue-5.6/btrfs-use-nofs-allocations-for-running-delayed-items.patch new file mode 100644 index 00000000000..d2d6f96bff2 --- /dev/null +++ b/queue-5.6/btrfs-use-nofs-allocations-for-running-delayed-items.patch @@ -0,0 +1,232 @@ +From 351cbf6e4410e7ece05e35d0a07320538f2418b4 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 19 Mar 2020 10:11:32 -0400 +Subject: btrfs: use nofs allocations for running delayed items + +From: Josef Bacik + +commit 351cbf6e4410e7ece05e35d0a07320538f2418b4 upstream. + +Zygo reported the following lockdep splat while testing the balance +patches + +====================================================== +WARNING: possible circular locking dependency detected +5.6.0-c6f0579d496a+ #53 Not tainted +------------------------------------------------------ +kswapd0/1133 is trying to acquire lock: +ffff888092f622c0 (&delayed_node->mutex){+.+.}, at: __btrfs_release_delayed_node+0x7c/0x5b0 + +but task is already holding lock: +ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (fs_reclaim){+.+.}: + fs_reclaim_acquire.part.91+0x29/0x30 + fs_reclaim_acquire+0x19/0x20 + kmem_cache_alloc_trace+0x32/0x740 + add_block_entry+0x45/0x260 + btrfs_ref_tree_mod+0x6e2/0x8b0 + btrfs_alloc_tree_block+0x789/0x880 + alloc_tree_block_no_bg_flush+0xc6/0xf0 + __btrfs_cow_block+0x270/0x940 + btrfs_cow_block+0x1ba/0x3a0 + btrfs_search_slot+0x999/0x1030 + btrfs_insert_empty_items+0x81/0xe0 + btrfs_insert_delayed_items+0x128/0x7d0 + __btrfs_run_delayed_items+0xf4/0x2a0 + btrfs_run_delayed_items+0x13/0x20 + btrfs_commit_transaction+0x5cc/0x1390 + insert_balance_item.isra.39+0x6b2/0x6e0 + btrfs_balance+0x72d/0x18d0 + btrfs_ioctl_balance+0x3de/0x4c0 + btrfs_ioctl+0x30ab/0x44a0 + ksys_ioctl+0xa1/0xe0 + __x64_sys_ioctl+0x43/0x50 + do_syscall_64+0x77/0x2c0 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +-> #0 (&delayed_node->mutex){+.+.}: + __lock_acquire+0x197e/0x2550 + lock_acquire+0x103/0x220 + __mutex_lock+0x13d/0xce0 + mutex_lock_nested+0x1b/0x20 + __btrfs_release_delayed_node+0x7c/0x5b0 + btrfs_remove_delayed_node+0x49/0x50 + btrfs_evict_inode+0x6fc/0x900 + evict+0x19a/0x2c0 + dispose_list+0xa0/0xe0 + prune_icache_sb+0xbd/0xf0 + super_cache_scan+0x1b5/0x250 + do_shrink_slab+0x1f6/0x530 + shrink_slab+0x32e/0x410 + shrink_node+0x2a5/0xba0 + balance_pgdat+0x4bd/0x8a0 + kswapd+0x35a/0x800 + kthread+0x1e9/0x210 + ret_from_fork+0x3a/0x50 + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(fs_reclaim); + lock(&delayed_node->mutex); + lock(fs_reclaim); + lock(&delayed_node->mutex); + + *** DEADLOCK *** + +3 locks held by kswapd0/1133: + #0: ffffffff8fc5f860 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 + #1: ffffffff8fc380d8 (shrinker_rwsem){++++}, at: shrink_slab+0x1e8/0x410 + #2: ffff8881e0e6c0e8 (&type->s_umount_key#42){++++}, at: trylock_super+0x1b/0x70 + +stack backtrace: +CPU: 2 PID: 1133 Comm: kswapd0 Not tainted 5.6.0-c6f0579d496a+ #53 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 +Call Trace: + dump_stack+0xc1/0x11a + print_circular_bug.isra.38.cold.57+0x145/0x14a + check_noncircular+0x2a9/0x2f0 + ? print_circular_bug.isra.38+0x130/0x130 + ? stack_trace_consume_entry+0x90/0x90 + ? save_trace+0x3cc/0x420 + __lock_acquire+0x197e/0x2550 + ? btrfs_inode_clear_file_extent_range+0x9b/0xb0 + ? register_lock_class+0x960/0x960 + lock_acquire+0x103/0x220 + ? __btrfs_release_delayed_node+0x7c/0x5b0 + __mutex_lock+0x13d/0xce0 + ? __btrfs_release_delayed_node+0x7c/0x5b0 + ? __asan_loadN+0xf/0x20 + ? pvclock_clocksource_read+0xeb/0x190 + ? __btrfs_release_delayed_node+0x7c/0x5b0 + ? mutex_lock_io_nested+0xc20/0xc20 + ? __kasan_check_read+0x11/0x20 + ? check_chain_key+0x1e6/0x2e0 + mutex_lock_nested+0x1b/0x20 + ? mutex_lock_nested+0x1b/0x20 + __btrfs_release_delayed_node+0x7c/0x5b0 + btrfs_remove_delayed_node+0x49/0x50 + btrfs_evict_inode+0x6fc/0x900 + ? btrfs_setattr+0x840/0x840 + ? do_raw_spin_unlock+0xa8/0x140 + evict+0x19a/0x2c0 + dispose_list+0xa0/0xe0 + prune_icache_sb+0xbd/0xf0 + ? invalidate_inodes+0x310/0x310 + super_cache_scan+0x1b5/0x250 + do_shrink_slab+0x1f6/0x530 + shrink_slab+0x32e/0x410 + ? do_shrink_slab+0x530/0x530 + ? do_shrink_slab+0x530/0x530 + ? __kasan_check_read+0x11/0x20 + ? mem_cgroup_protected+0x13d/0x260 + shrink_node+0x2a5/0xba0 + balance_pgdat+0x4bd/0x8a0 + ? mem_cgroup_shrink_node+0x490/0x490 + ? _raw_spin_unlock_irq+0x27/0x40 + ? finish_task_switch+0xce/0x390 + ? rcu_read_lock_bh_held+0xb0/0xb0 + kswapd+0x35a/0x800 + ? _raw_spin_unlock_irqrestore+0x4c/0x60 + ? balance_pgdat+0x8a0/0x8a0 + ? finish_wait+0x110/0x110 + ? __kasan_check_read+0x11/0x20 + ? __kthread_parkme+0xc6/0xe0 + ? balance_pgdat+0x8a0/0x8a0 + kthread+0x1e9/0x210 + ? kthread_create_worker_on_cpu+0xc0/0xc0 + ret_from_fork+0x3a/0x50 + +This is because we hold that delayed node's mutex while doing tree +operations. Fix this by just wrapping the searches in nofs. + +CC: stable@vger.kernel.org # 4.4+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/delayed-inode.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -6,6 +6,7 @@ + + #include + #include ++#include + #include "misc.h" + #include "delayed-inode.h" + #include "disk-io.h" +@@ -805,11 +806,14 @@ static int btrfs_insert_delayed_item(str + struct btrfs_delayed_item *delayed_item) + { + struct extent_buffer *leaf; ++ unsigned int nofs_flag; + char *ptr; + int ret; + ++ nofs_flag = memalloc_nofs_save(); + ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, + delayed_item->data_len); ++ memalloc_nofs_restore(nofs_flag); + if (ret < 0 && ret != -EEXIST) + return ret; + +@@ -937,6 +941,7 @@ static int btrfs_delete_delayed_items(st + struct btrfs_delayed_node *node) + { + struct btrfs_delayed_item *curr, *prev; ++ unsigned int nofs_flag; + int ret = 0; + + do_again: +@@ -945,7 +950,9 @@ do_again: + if (!curr) + goto delete_fail; + ++ nofs_flag = memalloc_nofs_save(); + ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); ++ memalloc_nofs_restore(nofs_flag); + if (ret < 0) + goto delete_fail; + else if (ret > 0) { +@@ -1012,6 +1019,7 @@ static int __btrfs_update_delayed_inode( + struct btrfs_key key; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; ++ unsigned int nofs_flag; + int mod; + int ret; + +@@ -1024,7 +1032,9 @@ static int __btrfs_update_delayed_inode( + else + mod = 1; + ++ nofs_flag = memalloc_nofs_save(); + ret = btrfs_lookup_inode(trans, root, path, &key, mod); ++ memalloc_nofs_restore(nofs_flag); + if (ret > 0) { + btrfs_release_path(path); + return -ENOENT; +@@ -1075,7 +1085,10 @@ search: + + key.type = BTRFS_INODE_EXTREF_KEY; + key.offset = -1; ++ ++ nofs_flag = memalloc_nofs_save(); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); ++ memalloc_nofs_restore(nofs_flag); + if (ret < 0) + goto err_out; + ASSERT(ret); diff --git a/queue-5.6/cifs-check-new-file-size-when-extending-file-by-fallocate.patch b/queue-5.6/cifs-check-new-file-size-when-extending-file-by-fallocate.patch new file mode 100644 index 00000000000..7848e9823d6 --- /dev/null +++ b/queue-5.6/cifs-check-new-file-size-when-extending-file-by-fallocate.patch @@ -0,0 +1,40 @@ +From ef4a632ccc1c7d3fb71a5baae85b79af08b7f94b Mon Sep 17 00:00:00 2001 +From: Murphy Zhou +Date: Wed, 18 Mar 2020 20:43:38 +0800 +Subject: CIFS: check new file size when extending file by fallocate + +From: Murphy Zhou + +commit ef4a632ccc1c7d3fb71a5baae85b79af08b7f94b upstream. + +xfstests generic/228 checks if fallocate respect RLIMIT_FSIZE. +After fallocate mode 0 extending enabled, we can hit this failure. +Fix this by check the new file size with vfs helper, return +error if file size is larger then RLIMIT_FSIZE(ulimit -f). + +This patch has been tested by LTP/xfstests aginst samba and +Windows server. + +Acked-by: Ronnie Sahlberg +Signed-off-by: Murphy Zhou +Signed-off-by: Steve French +CC: Stable +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2ops.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -3248,6 +3248,10 @@ static long smb3_simple_falloc(struct fi + * Extending the file + */ + if ((keep_size == false) && i_size_read(inode) < off + len) { ++ rc = inode_newsize_ok(inode, off + len); ++ if (rc) ++ goto out; ++ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) + smb2_set_sparse(xid, tcon, cfile, inode, false); + diff --git a/queue-5.6/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch b/queue-5.6/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch new file mode 100644 index 00000000000..149e9691bb7 --- /dev/null +++ b/queue-5.6/cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch @@ -0,0 +1,63 @@ +From 97adda8b3ab703de8e4c8d27646ddd54fe22879c Mon Sep 17 00:00:00 2001 +From: Yilu Lin +Date: Wed, 18 Mar 2020 11:59:19 +0800 +Subject: CIFS: Fix bug which the return value by asynchronous read is error + +From: Yilu Lin + +commit 97adda8b3ab703de8e4c8d27646ddd54fe22879c upstream. + +This patch is used to fix the bug in collect_uncached_read_data() +that rc is automatically converted from a signed number to an +unsigned number when the CIFS asynchronous read fails. +It will cause ctx->rc is error. + +Example: +Share a directory and create a file on the Windows OS. +Mount the directory to the Linux OS using CIFS. +On the CIFS client of the Linux OS, invoke the pread interface to +deliver the read request. + +The size of the read length plus offset of the read request is greater +than the maximum file size. + +In this case, the CIFS server on the Windows OS returns a failure +message (for example, the return value of +smb2.nt_status is STATUS_INVALID_PARAMETER). + +After receiving the response message, the CIFS client parses +smb2.nt_status to STATUS_INVALID_PARAMETER +and converts it to the Linux error code (rdata->result=-22). + +Then the CIFS client invokes the collect_uncached_read_data function to +assign the value of rdata->result to rc, that is, rc=rdata->result=-22. + +The type of the ctx->total_len variable is unsigned integer, +the type of the rc variable is integer, and the type of +the ctx->rc variable is ssize_t. + +Therefore, during the ternary operation, the value of rc is +automatically converted to an unsigned number. The final result is +ctx->rc=4294967274. However, the expected result is ctx->rc=-22. + +Signed-off-by: Yilu Lin +Signed-off-by: Steve French +CC: Stable +Acked-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/cifs/file.c ++++ b/fs/cifs/file.c +@@ -3841,7 +3841,7 @@ again: + if (rc == -ENODATA) + rc = 0; + +- ctx->rc = (rc == 0) ? ctx->total_len : rc; ++ ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; + + mutex_unlock(&ctx->aio_mutex); + diff --git a/queue-5.6/drm-i915-gen12-disable-preemption-timeout.patch b/queue-5.6/drm-i915-gen12-disable-preemption-timeout.patch new file mode 100644 index 00000000000..8457761f940 --- /dev/null +++ b/queue-5.6/drm-i915-gen12-disable-preemption-timeout.patch @@ -0,0 +1,96 @@ +From 07bcfd1291de77ffa9b627b4442783aba1335229 Mon Sep 17 00:00:00 2001 +From: Tvrtko Ursulin +Date: Thu, 12 Mar 2020 11:57:48 +0000 +Subject: drm/i915/gen12: Disable preemption timeout + +From: Tvrtko Ursulin + +commit 07bcfd1291de77ffa9b627b4442783aba1335229 upstream. + +Allow super long OpenCL workloads which cannot be preempted within +the default timeout to run out of the box. + +v2: + * Make it stick out more and apply only to RCS. (Chris) + +v3: + * Mention platform override in kconfig. (Joonas) + +Signed-off-by: Tvrtko Ursulin +Cc: Chris Wilson +Cc: Joonas Lahtinen +Cc: Michal Mrozek +Cc: # v5.6+ +Acked-by: Chris Wilson +Acked-by: Michal Mrozek +Link: https://patchwork.freedesktop.org/patch/msgid/20200312115748.29970-1-tvrtko.ursulin@linux.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/i915/Kconfig.profile | 4 ++++ + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +++++++++---- + 2 files changed, 13 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/Kconfig.profile ++++ b/drivers/gpu/drm/i915/Kconfig.profile +@@ -35,6 +35,10 @@ config DRM_I915_PREEMPT_TIMEOUT + + May be 0 to disable the timeout. + ++ The compiled in default may get overridden at driver probe time on ++ certain platforms and certain engines which will be reflected in the ++ sysfs control. ++ + config DRM_I915_SPIN_REQUEST + int "Busywait for request completion (us)" + default 5 # microseconds +--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c +@@ -274,6 +274,7 @@ static void intel_engine_sanitize_mmio(s + static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) + { + const struct engine_info *info = &intel_engines[id]; ++ struct drm_i915_private *i915 = gt->i915; + struct intel_engine_cs *engine; + + BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); +@@ -300,11 +301,11 @@ static int intel_engine_setup(struct int + engine->id = id; + engine->legacy_idx = INVALID_ENGINE; + engine->mask = BIT(id); +- engine->i915 = gt->i915; ++ engine->i915 = i915; + engine->gt = gt; + engine->uncore = gt->uncore; + engine->hw_id = engine->guc_id = info->hw_id; +- engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases); ++ engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); + + engine->class = info->class; + engine->instance = info->instance; +@@ -319,11 +320,15 @@ static int intel_engine_setup(struct int + engine->props.timeslice_duration_ms = + CONFIG_DRM_I915_TIMESLICE_DURATION; + ++ /* Override to uninterruptible for OpenCL workloads. */ ++ if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) ++ engine->props.preempt_timeout_ms = 0; ++ + engine->context_size = intel_engine_context_size(gt, engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0; + if (engine->context_size) +- DRIVER_CAPS(gt->i915)->has_logical_contexts = true; ++ DRIVER_CAPS(i915)->has_logical_contexts = true; + + /* Nothing to do here, execute in order of dependencies */ + engine->schedule = NULL; +@@ -339,7 +344,7 @@ static int intel_engine_setup(struct int + gt->engine_class[info->class][info->instance] = engine; + gt->engine[id] = engine; + +- gt->i915->engine[id] = engine; ++ i915->engine[id] = engine; + + return 0; + } diff --git a/queue-5.6/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch b/queue-5.6/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch new file mode 100644 index 00000000000..b0d76bd69fc --- /dev/null +++ b/queue-5.6/kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch @@ -0,0 +1,163 @@ +From a1c77abb8d93381e25a8d2df3a917388244ba776 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 2 Mar 2020 22:27:35 -0800 +Subject: KVM: nVMX: Properly handle userspace interrupt window request + +From: Sean Christopherson + +commit a1c77abb8d93381e25a8d2df3a917388244ba776 upstream. + +Return true for vmx_interrupt_allowed() if the vCPU is in L2 and L1 has +external interrupt exiting enabled. IRQs are never blocked in hardware +if the CPU is in the guest (L2 from L1's perspective) when IRQs trigger +VM-Exit. + +The new check percolates up to kvm_vcpu_ready_for_interrupt_injection() +and thus vcpu_run(), and so KVM will exit to userspace if userspace has +requested an interrupt window (to inject an IRQ into L1). + +Remove the @external_intr param from vmx_check_nested_events(), which is +actually an indicator that userspace wants an interrupt window, e.g. +it's named @req_int_win further up the stack. Injecting a VM-Exit into +L1 to try and bounce out to L0 userspace is all kinds of broken and is +no longer necessary. + +Remove the hack in nested_vmx_vmexit() that attempted to workaround the +breakage in vmx_check_nested_events() by only filling interrupt info if +there's an actual interrupt pending. The hack actually made things +worse because it caused KVM to _never_ fill interrupt info when the +LAPIC resides in userspace (kvm_cpu_has_interrupt() queries +interrupt.injected, which is always cleared by prepare_vmcs12() before +reaching the hack in nested_vmx_vmexit()). + +Fixes: 6550c4df7e50 ("KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit"") +Cc: stable@vger.kernel.org +Cc: Liran Alon +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_host.h | 2 +- + arch/x86/kvm/vmx/nested.c | 18 ++++-------------- + arch/x86/kvm/vmx/vmx.c | 9 +++++++-- + arch/x86/kvm/x86.c | 10 +++++----- + 4 files changed, 17 insertions(+), 22 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1180,7 +1180,7 @@ struct kvm_x86_ops { + bool (*pt_supported)(void); + bool (*pku_supported)(void); + +- int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); ++ int (*check_nested_events)(struct kvm_vcpu *vcpu); + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); + + void (*sched_in)(struct kvm_vcpu *kvm, int cpu); +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3604,7 +3604,7 @@ static void nested_vmx_update_pending_db + vcpu->arch.exception.payload); + } + +-static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) ++static int vmx_check_nested_events(struct kvm_vcpu *vcpu) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long exit_qual; +@@ -3680,8 +3680,7 @@ static int vmx_check_nested_events(struc + return 0; + } + +- if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && +- nested_exit_on_intr(vcpu)) { ++ if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); +@@ -4329,17 +4328,8 @@ void nested_vmx_vmexit(struct kvm_vcpu * + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + if (likely(!vmx->fail)) { +- /* +- * TODO: SDM says that with acknowledge interrupt on +- * exit, bit 31 of the VM-exit interrupt information +- * (valid interrupt) is always set to 1 on +- * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't +- * need kvm_cpu_has_interrupt(). See the commit +- * message for details. +- */ +- if (nested_exit_intr_ack_set(vcpu) && +- exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && +- kvm_cpu_has_interrupt(vcpu)) { ++ if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && ++ nested_exit_intr_ack_set(vcpu)) { + int irq = kvm_cpu_get_interrupt(vcpu); + WARN_ON(irq < 0); + vmcs12->vm_exit_intr_info = irq | +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -4507,8 +4507,13 @@ static int vmx_nmi_allowed(struct kvm_vc + + static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) + { +- return (!to_vmx(vcpu)->nested.nested_run_pending && +- vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && ++ if (to_vmx(vcpu)->nested.nested_run_pending) ++ return false; ++ ++ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) ++ return true; ++ ++ return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); + } +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -7635,7 +7635,7 @@ static void update_cr8_intercept(struct + kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); + } + +-static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) ++static int inject_pending_event(struct kvm_vcpu *vcpu) + { + int r; + +@@ -7671,7 +7671,7 @@ static int inject_pending_event(struct k + * from L2 to L1. + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { +- r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); ++ r = kvm_x86_ops->check_nested_events(vcpu); + if (r != 0) + return r; + } +@@ -7733,7 +7733,7 @@ static int inject_pending_event(struct k + * KVM_REQ_EVENT only on certain events and not unconditionally? + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { +- r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); ++ r = kvm_x86_ops->check_nested_events(vcpu); + if (r != 0) + return r; + } +@@ -8266,7 +8266,7 @@ static int vcpu_enter_guest(struct kvm_v + goto out; + } + +- if (inject_pending_event(vcpu, req_int_win) != 0) ++ if (inject_pending_event(vcpu) != 0) + req_immediate_exit = true; + else { + /* Enable SMI/NMI/IRQ window open exits if needed. +@@ -8496,7 +8496,7 @@ static inline int vcpu_block(struct kvm + static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) + { + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) +- kvm_x86_ops->check_nested_events(vcpu, false); ++ kvm_x86_ops->check_nested_events(vcpu); + + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && + !vcpu->arch.apf.halted); diff --git a/queue-5.6/kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch b/queue-5.6/kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch new file mode 100644 index 00000000000..decfb066bcf --- /dev/null +++ b/queue-5.6/kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch @@ -0,0 +1,71 @@ +From 9bee484b280a059c1faa10ae174af4f4af02c805 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 19 Mar 2020 19:55:10 -0300 +Subject: KVM: PPC: Book3S HV: Skip kvmppc_uvmem_free if Ultravisor is not supported + +From: Fabiano Rosas + +commit 9bee484b280a059c1faa10ae174af4f4af02c805 upstream. + +kvmppc_uvmem_init checks for Ultravisor support and returns early if +it is not present. Calling kvmppc_uvmem_free at module exit will cause +an Oops: + +$ modprobe -r kvm-hv + + Oops: Kernel access of bad area, sig: 11 [#1] + + NIP: c000000000789e90 LR: c000000000789e8c CTR: c000000000401030 + REGS: c000003fa7bab9a0 TRAP: 0300 Not tainted (5.6.0-rc6-00033-g6c90b86a745a-dirty) + MSR: 9000000000009033 CR: 24002282 XER: 00000000 + CFAR: c000000000dae880 DAR: 0000000000000008 DSISR: 40000000 IRQMASK: 1 + GPR00: c000000000789e8c c000003fa7babc30 c0000000016fe500 0000000000000000 + GPR04: 0000000000000000 0000000000000006 0000000000000000 c000003faf205c00 + GPR08: 0000000000000000 0000000000000001 000000008000002d c00800000ddde140 + GPR12: c000000000401030 c000003ffffd9080 0000000000000001 0000000000000000 + GPR16: 0000000000000000 0000000000000000 000000013aad0074 000000013aaac978 + GPR20: 000000013aad0070 0000000000000000 00007fffd1b37158 0000000000000000 + GPR24: 000000014fef0d58 0000000000000000 000000014fef0cf0 0000000000000001 + GPR28: 0000000000000000 0000000000000000 c0000000018b2a60 0000000000000000 + NIP [c000000000789e90] percpu_ref_kill_and_confirm+0x40/0x170 + LR [c000000000789e8c] percpu_ref_kill_and_confirm+0x3c/0x170 + Call Trace: + [c000003fa7babc30] [c000003faf2064d4] 0xc000003faf2064d4 (unreliable) + [c000003fa7babcb0] [c000000000400e8c] dev_pagemap_kill+0x6c/0x80 + [c000003fa7babcd0] [c000000000401064] memunmap_pages+0x34/0x2f0 + [c000003fa7babd50] [c00800000dddd548] kvmppc_uvmem_free+0x30/0x80 [kvm_hv] + [c000003fa7babd80] [c00800000ddcef18] kvmppc_book3s_exit_hv+0x20/0x78 [kvm_hv] + [c000003fa7babda0] [c0000000002084d0] sys_delete_module+0x1d0/0x2c0 + [c000003fa7babe20] [c00000000000b9d0] system_call+0x5c/0x68 + Instruction dump: + 3fc2001b fb81ffe0 fba1ffe8 fbe1fff8 7c7f1b78 7c9c2378 3bde4560 7fc3f378 + f8010010 f821ff81 486249a1 60000000 7c7d1b78 712a0002 40820084 + ---[ end trace 5774ef4dc2c98279 ]--- + +So this patch checks if kvmppc_uvmem_init actually allocated anything +before running kvmppc_uvmem_free. + +Fixes: ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests") +Cc: stable@vger.kernel.org # v5.5+ +Reported-by: Greg Kurz +Signed-off-by: Fabiano Rosas +Tested-by: Greg Kurz +Signed-off-by: Paul Mackerras +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/kvm/book3s_hv_uvmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/powerpc/kvm/book3s_hv_uvmem.c ++++ b/arch/powerpc/kvm/book3s_hv_uvmem.c +@@ -806,6 +806,9 @@ out: + + void kvmppc_uvmem_free(void) + { ++ if (!kvmppc_uvmem_bitmap) ++ return; ++ + memunmap_pages(&kvmppc_uvmem_pgmap); + release_mem_region(kvmppc_uvmem_pgmap.res.start, + resource_size(&kvmppc_uvmem_pgmap.res)); diff --git a/queue-5.6/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch b/queue-5.6/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch new file mode 100644 index 00000000000..534152248e9 --- /dev/null +++ b/queue-5.6/kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch @@ -0,0 +1,50 @@ +From 4d4cee96fb7a3cc53702a9be8299bf525be4ee98 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 3 Apr 2020 17:30:47 +0200 +Subject: KVM: s390: vsie: Fix delivery of addressing exceptions + +From: David Hildenbrand + +commit 4d4cee96fb7a3cc53702a9be8299bf525be4ee98 upstream. + +Whenever we get an -EFAULT, we failed to read in guest 2 physical +address space. Such addressing exceptions are reported via a program +intercept to the nested hypervisor. + +We faked the intercept, we have to return to guest 2. Instead, right +now we would be returning -EFAULT from the intercept handler, eventually +crashing the VM. +the correct thing to do is to return 1 as rc == 1 is the internal +representation of "we have to go back into g2". + +Addressing exceptions can only happen if the g2->g3 page tables +reference invalid g2 addresses (say, either a table or the final page is +not accessible - so something that basically never happens in sane +environments. + +Identified by manual code inspection. + +Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") +Cc: # v4.8+ +Signed-off-by: David Hildenbrand +Link: https://lore.kernel.org/r/20200403153050.20569-3-david@redhat.com +Reviewed-by: Claudio Imbrenda +Reviewed-by: Christian Borntraeger +[borntraeger@de.ibm.com: fix patch description] +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/vsie.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/s390/kvm/vsie.c ++++ b/arch/s390/kvm/vsie.c +@@ -1202,6 +1202,7 @@ static int vsie_run(struct kvm_vcpu *vcp + scb_s->iprcc = PGM_ADDRESSING; + scb_s->pgmilc = 4; + scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); ++ rc = 1; + } + return rc; + } diff --git a/queue-5.6/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch b/queue-5.6/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch new file mode 100644 index 00000000000..56f6af758a9 --- /dev/null +++ b/queue-5.6/kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch @@ -0,0 +1,56 @@ +From a1d032a49522cb5368e5dfb945a85899b4c74f65 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 3 Apr 2020 17:30:46 +0200 +Subject: KVM: s390: vsie: Fix region 1 ASCE sanity shadow address checks + +From: David Hildenbrand + +commit a1d032a49522cb5368e5dfb945a85899b4c74f65 upstream. + +In case we have a region 1 the following calculation +(31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11) +results in 64. As shifts beyond the size are undefined the compiler is +free to use instructions like sllg. sllg will only use 6 bits of the +shift value (here 64) resulting in no shift at all. That means that ALL +addresses will be rejected. + +The can result in endless loops, e.g. when prefix cannot get mapped. + +Fixes: 4be130a08420 ("s390/mm: add shadow gmap support") +Tested-by: Janosch Frank +Reported-by: Janosch Frank +Cc: # v4.8+ +Signed-off-by: David Hildenbrand +Link: https://lore.kernel.org/r/20200403153050.20569-2-david@redhat.com +Reviewed-by: Claudio Imbrenda +Reviewed-by: Christian Borntraeger +[borntraeger@de.ibm.com: fix patch description, remove WARN_ON_ONCE] +Signed-off-by: Christian Borntraeger +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/gmap.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/s390/mm/gmap.c ++++ b/arch/s390/mm/gmap.c +@@ -787,14 +787,18 @@ static void gmap_call_notifier(struct gm + static inline unsigned long *gmap_table_walk(struct gmap *gmap, + unsigned long gaddr, int level) + { ++ const int asce_type = gmap->asce & _ASCE_TYPE_MASK; + unsigned long *table; + + if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) + return NULL; + if (gmap_is_shadow(gmap) && gmap->removed) + return NULL; +- if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) ++ ++ if (asce_type != _ASCE_TYPE_REGION1 && ++ gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) + return NULL; ++ + table = gmap->table; + switch (gmap->asce & _ASCE_TYPE_MASK) { + case _ASCE_TYPE_REGION1: diff --git a/queue-5.6/kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch b/queue-5.6/kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch new file mode 100644 index 00000000000..9410ae4b551 --- /dev/null +++ b/queue-5.6/kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch @@ -0,0 +1,149 @@ +From 842f4be95899df22b5843ba1a7c8cf37e831a6e8 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 26 Mar 2020 09:07:12 -0700 +Subject: KVM: VMX: Add a trampoline to fix VMREAD error handling + +From: Sean Christopherson + +commit 842f4be95899df22b5843ba1a7c8cf37e831a6e8 upstream. + +Add a hand coded assembly trampoline to preserve volatile registers +across vmread_error(), and to handle the calling convention differences +between 64-bit and 32-bit due to asmlinkage on vmread_error(). Pass +@field and @fault on the stack when invoking the trampoline to avoid +clobbering volatile registers in the context of the inline assembly. + +Calling vmread_error() directly from inline assembly is partially broken +on 64-bit, and completely broken on 32-bit. On 64-bit, it will clobber +%rdi and %rsi (used to pass @field and @fault) and any volatile regs +written by vmread_error(). On 32-bit, asmlinkage means vmread_error() +expects the parameters to be passed on the stack, not via regs. + +Opportunistically zero out the result in the trampoline to save a few +bytes of code for every VMREAD. A happy side effect of the trampoline +is that the inline code footprint is reduced by three bytes on 64-bit +due to PUSH/POP being more efficent (in terms of opcode bytes) than MOV. + +Fixes: 6e2020977e3e6 ("KVM: VMX: Add error handling to VMREAD helper") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20200326160712.28803-1-sean.j.christopherson@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/ops.h | 28 ++++++++++++++++----- + arch/x86/kvm/vmx/vmenter.S | 58 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 79 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/vmx/ops.h ++++ b/arch/x86/kvm/vmx/ops.h +@@ -12,7 +12,8 @@ + + #define __ex(x) __kvm_handle_fault_on_reboot(x) + +-asmlinkage void vmread_error(unsigned long field, bool fault); ++__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, ++ bool fault); + void vmwrite_error(unsigned long field, unsigned long value); + void vmclear_error(struct vmcs *vmcs, u64 phys_addr); + void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); +@@ -70,15 +71,28 @@ static __always_inline unsigned long __v + asm volatile("1: vmread %2, %1\n\t" + ".byte 0x3e\n\t" /* branch taken hint */ + "ja 3f\n\t" +- "mov %2, %%" _ASM_ARG1 "\n\t" +- "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t" +- "2: call vmread_error\n\t" +- "xor %k1, %k1\n\t" ++ ++ /* ++ * VMREAD failed. Push '0' for @fault, push the failing ++ * @field, and bounce through the trampoline to preserve ++ * volatile registers. ++ */ ++ "push $0\n\t" ++ "push %2\n\t" ++ "2:call vmread_error_trampoline\n\t" ++ ++ /* ++ * Unwind the stack. Note, the trampoline zeros out the ++ * memory for @fault so that the result is '0' on error. ++ */ ++ "pop %2\n\t" ++ "pop %1\n\t" + "3:\n\t" + ++ /* VMREAD faulted. As above, except push '1' for @fault. */ + ".pushsection .fixup, \"ax\"\n\t" +- "4: mov %2, %%" _ASM_ARG1 "\n\t" +- "mov $1, %%" _ASM_ARG2 "\n\t" ++ "4: push $1\n\t" ++ "push %2\n\t" + "jmp 2b\n\t" + ".popsection\n\t" + _ASM_EXTABLE(1b, 4b) +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -234,3 +234,61 @@ SYM_FUNC_START(__vmx_vcpu_run) + 2: mov $1, %eax + jmp 1b + SYM_FUNC_END(__vmx_vcpu_run) ++ ++/** ++ * vmread_error_trampoline - Trampoline from inline asm to vmread_error() ++ * @field: VMCS field encoding that failed ++ * @fault: %true if the VMREAD faulted, %false if it failed ++ ++ * Save and restore volatile registers across a call to vmread_error(). Note, ++ * all parameters are passed on the stack. ++ */ ++SYM_FUNC_START(vmread_error_trampoline) ++ push %_ASM_BP ++ mov %_ASM_SP, %_ASM_BP ++ ++ push %_ASM_AX ++ push %_ASM_CX ++ push %_ASM_DX ++#ifdef CONFIG_X86_64 ++ push %rdi ++ push %rsi ++ push %r8 ++ push %r9 ++ push %r10 ++ push %r11 ++#endif ++#ifdef CONFIG_X86_64 ++ /* Load @field and @fault to arg1 and arg2 respectively. */ ++ mov 3*WORD_SIZE(%rbp), %_ASM_ARG2 ++ mov 2*WORD_SIZE(%rbp), %_ASM_ARG1 ++#else ++ /* Parameters are passed on the stack for 32-bit (see asmlinkage). */ ++ push 3*WORD_SIZE(%ebp) ++ push 2*WORD_SIZE(%ebp) ++#endif ++ ++ call vmread_error ++ ++#ifndef CONFIG_X86_64 ++ add $8, %esp ++#endif ++ ++ /* Zero out @fault, which will be popped into the result register. */ ++ _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP) ++ ++#ifdef CONFIG_X86_64 ++ pop %r11 ++ pop %r10 ++ pop %r9 ++ pop %r8 ++ pop %rsi ++ pop %rdi ++#endif ++ pop %_ASM_DX ++ pop %_ASM_CX ++ pop %_ASM_AX ++ pop %_ASM_BP ++ ++ ret ++SYM_FUNC_END(vmread_error_trampoline) diff --git a/queue-5.6/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch b/queue-5.6/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch new file mode 100644 index 00000000000..f10b7347b21 --- /dev/null +++ b/queue-5.6/kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch @@ -0,0 +1,180 @@ +From 31603d4fc2bb4f0815245d496cb970b27b4f636a Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Sat, 21 Mar 2020 12:37:49 -0700 +Subject: KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support + +From: Sean Christopherson + +commit 31603d4fc2bb4f0815245d496cb970b27b4f636a upstream. + +VMCLEAR all in-use VMCSes during a crash, even if kdump's NMI shootdown +interrupted a KVM update of the percpu in-use VMCS list. + +Because NMIs are not blocked by disabling IRQs, it's possible that +crash_vmclear_local_loaded_vmcss() could be called while the percpu list +of VMCSes is being modified, e.g. in the middle of list_add() in +vmx_vcpu_load_vmcs(). This potential corner case was called out in the +original commit[*], but the analysis of its impact was wrong. + +Skipping the VMCLEARs is wrong because it all but guarantees that a +loaded, and therefore cached, VMCS will live across kexec and corrupt +memory in the new kernel. Corruption will occur because the CPU's VMCS +cache is non-coherent, i.e. not snooped, and so the writeback of VMCS +memory on its eviction will overwrite random memory in the new kernel. +The VMCS will live because the NMI shootdown also disables VMX, i.e. the +in-progress VMCLEAR will #UD, and existing Intel CPUs do not flush the +VMCS cache on VMXOFF. + +Furthermore, interrupting list_add() and list_del() is safe due to +crash_vmclear_local_loaded_vmcss() using forward iteration. list_add() +ensures the new entry is not visible to forward iteration unless the +entire add completes, via WRITE_ONCE(prev->next, new). A bad "prev" +pointer could be observed if the NMI shootdown interrupted list_del() or +list_add(), but list_for_each_entry() does not consume ->prev. + +In addition to removing the temporary disabling of VMCLEAR, open code +loaded_vmcs_init() in __loaded_vmcs_clear() and reorder VMCLEAR so that +the VMCS is deleted from the list only after it's been VMCLEAR'd. +Deleting the VMCS before VMCLEAR would allow a race where the NMI +shootdown could arrive between list_del() and vmcs_clear() and thus +neither flow would execute a successful VMCLEAR. Alternatively, more +code could be moved into loaded_vmcs_init(), but that gets rather silly +as the only other user, alloc_loaded_vmcs(), doesn't need the smp_wmb() +and would need to work around the list_del(). + +Update the smp_*() comments related to the list manipulation, and +opportunistically reword them to improve clarity. + +[*] https://patchwork.kernel.org/patch/1675731/#3720461 + +Fixes: 8f536b7697a0 ("KVM: VMX: provide the vmclear function and a bitmap to support VMCLEAR in kdump") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20200321193751.24985-2-sean.j.christopherson@intel.com> +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/vmx.c | 67 +++++++++++-------------------------------------- + 1 file changed, 16 insertions(+), 51 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -666,43 +666,15 @@ void loaded_vmcs_init(struct loaded_vmcs + } + + #ifdef CONFIG_KEXEC_CORE +-/* +- * This bitmap is used to indicate whether the vmclear +- * operation is enabled on all cpus. All disabled by +- * default. +- */ +-static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE; +- +-static inline void crash_enable_local_vmclear(int cpu) +-{ +- cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap); +-} +- +-static inline void crash_disable_local_vmclear(int cpu) +-{ +- cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap); +-} +- +-static inline int crash_local_vmclear_enabled(int cpu) +-{ +- return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap); +-} +- + static void crash_vmclear_local_loaded_vmcss(void) + { + int cpu = raw_smp_processor_id(); + struct loaded_vmcs *v; + +- if (!crash_local_vmclear_enabled(cpu)) +- return; +- + list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu), + loaded_vmcss_on_cpu_link) + vmcs_clear(v->vmcs); + } +-#else +-static inline void crash_enable_local_vmclear(int cpu) { } +-static inline void crash_disable_local_vmclear(int cpu) { } + #endif /* CONFIG_KEXEC_CORE */ + + static void __loaded_vmcs_clear(void *arg) +@@ -714,19 +686,24 @@ static void __loaded_vmcs_clear(void *ar + return; /* vcpu migration can race with cpu offline */ + if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs) + per_cpu(current_vmcs, cpu) = NULL; +- crash_disable_local_vmclear(cpu); ++ ++ vmcs_clear(loaded_vmcs->vmcs); ++ if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) ++ vmcs_clear(loaded_vmcs->shadow_vmcs); ++ + list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link); + + /* +- * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link +- * is before setting loaded_vmcs->vcpu to -1 which is done in +- * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist +- * then adds the vmcs into percpu list before it is deleted. ++ * Ensure all writes to loaded_vmcs, including deleting it from its ++ * current percpu list, complete before setting loaded_vmcs->vcpu to ++ * -1, otherwise a different cpu can see vcpu == -1 first and add ++ * loaded_vmcs to its percpu list before it's deleted from this cpu's ++ * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). + */ + smp_wmb(); + +- loaded_vmcs_init(loaded_vmcs); +- crash_enable_local_vmclear(cpu); ++ loaded_vmcs->cpu = -1; ++ loaded_vmcs->launched = 0; + } + + void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) +@@ -1345,18 +1322,17 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu + if (!already_loaded) { + loaded_vmcs_clear(vmx->loaded_vmcs); + local_irq_disable(); +- crash_disable_local_vmclear(cpu); + + /* +- * Read loaded_vmcs->cpu should be before fetching +- * loaded_vmcs->loaded_vmcss_on_cpu_link. +- * See the comments in __loaded_vmcs_clear(). ++ * Ensure loaded_vmcs->cpu is read before adding loaded_vmcs to ++ * this cpu's percpu list, otherwise it may not yet be deleted ++ * from its previous cpu's percpu list. Pairs with the ++ * smb_wmb() in __loaded_vmcs_clear(). + */ + smp_rmb(); + + list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link, + &per_cpu(loaded_vmcss_on_cpu, cpu)); +- crash_enable_local_vmclear(cpu); + local_irq_enable(); + } + +@@ -2292,17 +2268,6 @@ static int hardware_enable(void) + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + +- /* +- * Now we can enable the vmclear operation in kdump +- * since the loaded_vmcss_on_cpu list on this cpu +- * has been initialized. +- * +- * Though the cpu is not in VMX operation now, there +- * is no problem to enable the vmclear operation +- * for the loaded_vmcss_on_cpu list is empty! +- */ +- crash_enable_local_vmclear(cpu); +- + kvm_cpu_vmxon(phys_addr); + if (enable_ept) + ept_sync_global(); diff --git a/queue-5.6/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch b/queue-5.6/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch new file mode 100644 index 00000000000..de01eba9644 --- /dev/null +++ b/queue-5.6/kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch @@ -0,0 +1,75 @@ +From dbef2808af6c594922fe32833b30f55f35e9da6d Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Wed, 1 Apr 2020 10:13:48 +0200 +Subject: KVM: VMX: fix crash cleanup when KVM wasn't used + +From: Vitaly Kuznetsov + +commit dbef2808af6c594922fe32833b30f55f35e9da6d upstream. + +If KVM wasn't used at all before we crash the cleanup procedure fails with + BUG: unable to handle page fault for address: ffffffffffffffc8 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 23215067 P4D 23215067 PUD 23217067 PMD 0 + Oops: 0000 [#8] SMP PTI + CPU: 0 PID: 3542 Comm: bash Kdump: loaded Tainted: G D 5.6.0-rc2+ #823 + RIP: 0010:crash_vmclear_local_loaded_vmcss.cold+0x19/0x51 [kvm_intel] + +The root cause is that loaded_vmcss_on_cpu list is not yet initialized, +we initialize it in hardware_enable() but this only happens when we start +a VM. + +Previously, we used to have a bitmap with enabled CPUs and that was +preventing [masking] the issue. + +Initialized loaded_vmcss_on_cpu list earlier, right before we assign +crash_vmclear_loaded_vmcss pointer. blocked_vcpu_on_cpu list and +blocked_vcpu_on_cpu_lock are moved altogether for consistency. + +Fixes: 31603d4fc2bb ("KVM: VMX: Always VMCLEAR in-use VMCSes during crash with kexec support") +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20200401081348.1345307-1-vkuznets@redhat.com> +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/vmx.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -2264,10 +2264,6 @@ static int hardware_enable(void) + !hv_get_vp_assist_page(cpu)) + return -EFAULT; + +- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); +- INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); +- spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); +- + kvm_cpu_vmxon(phys_addr); + if (enable_ept) + ept_sync_global(); +@@ -8025,7 +8021,7 @@ module_exit(vmx_exit); + + static int __init vmx_init(void) + { +- int r; ++ int r, cpu; + + #if IS_ENABLED(CONFIG_HYPERV) + /* +@@ -8079,6 +8075,12 @@ static int __init vmx_init(void) + return r; + } + ++ for_each_possible_cpu(cpu) { ++ INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); ++ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); ++ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); ++ } ++ + #ifdef CONFIG_KEXEC_CORE + rcu_assign_pointer(crash_vmclear_loaded_vmcss, + crash_vmclear_local_loaded_vmcss); diff --git a/queue-5.6/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch b/queue-5.6/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch new file mode 100644 index 00000000000..a4f3a4163de --- /dev/null +++ b/queue-5.6/kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch @@ -0,0 +1,102 @@ +From edd4fa37baa6ee8e44dc65523b27bd6fe44c94de Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 18 Feb 2020 13:07:15 -0800 +Subject: KVM: x86: Allocate new rmap and large page tracking when moving memslot + +From: Sean Christopherson + +commit edd4fa37baa6ee8e44dc65523b27bd6fe44c94de upstream. + +Reallocate a rmap array and recalcuate large page compatibility when +moving an existing memslot to correctly handle the alignment properties +of the new memslot. The number of rmap entries required at each level +is dependent on the alignment of the memslot's base gfn with respect to +that level, e.g. moving a large-page aligned memslot so that it becomes +unaligned will increase the number of rmap entries needed at the now +unaligned level. + +Not updating the rmap array is the most obvious bug, as KVM accesses +garbage data beyond the end of the rmap. KVM interprets the bad data as +pointers, leading to non-canonical #GPs, unexpected #PFs, etc... + + general protection fault: 0000 [#1] SMP + CPU: 0 PID: 1909 Comm: move_memory_reg Not tainted 5.4.0-rc7+ #139 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 + RIP: 0010:rmap_get_first+0x37/0x50 [kvm] + Code: <48> 8b 3b 48 85 ff 74 ec e8 6c f4 ff ff 85 c0 74 e3 48 89 d8 5b c3 + RSP: 0018:ffffc9000021bbc8 EFLAGS: 00010246 + RAX: ffff00617461642e RBX: ffff00617461642e RCX: 0000000000000012 + RDX: ffff88827400f568 RSI: ffffc9000021bbe0 RDI: ffff88827400f570 + RBP: 0010000000000000 R08: ffffc9000021bd00 R09: ffffc9000021bda8 + R10: ffffc9000021bc48 R11: 0000000000000000 R12: 0030000000000000 + R13: 0000000000000000 R14: ffff88827427d700 R15: ffffc9000021bce8 + FS: 00007f7eda014700(0000) GS:ffff888277a00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f7ed9216ff8 CR3: 0000000274391003 CR4: 0000000000162eb0 + Call Trace: + kvm_mmu_slot_set_dirty+0xa1/0x150 [kvm] + __kvm_set_memory_region.part.64+0x559/0x960 [kvm] + kvm_set_memory_region+0x45/0x60 [kvm] + kvm_vm_ioctl+0x30f/0x920 [kvm] + do_vfs_ioctl+0xa1/0x620 + ksys_ioctl+0x66/0x70 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x4c/0x170 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + RIP: 0033:0x7f7ed9911f47 + Code: <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 21 6f 2c 00 f7 d8 64 89 01 48 + RSP: 002b:00007ffc00937498 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 + RAX: ffffffffffffffda RBX: 0000000001ab0010 RCX: 00007f7ed9911f47 + RDX: 0000000001ab1350 RSI: 000000004020ae46 RDI: 0000000000000004 + RBP: 000000000000000a R08: 0000000000000000 R09: 00007f7ed9214700 + R10: 00007f7ed92149d0 R11: 0000000000000246 R12: 00000000bffff000 + R13: 0000000000000003 R14: 00007f7ed9215000 R15: 0000000000000000 + Modules linked in: kvm_intel kvm irqbypass + ---[ end trace 0c5f570b3358ca89 ]--- + +The disallow_lpage tracking is more subtle. Failure to update results +in KVM creating large pages when it shouldn't, either due to stale data +or again due to indexing beyond the end of the metadata arrays, which +can lead to memory corruption and/or leaking data to guest/userspace. + +Note, the arrays for the old memslot are freed by the unconditional call +to kvm_free_memslot() in __kvm_set_memory_region(). + +Fixes: 05da45583de9b ("KVM: MMU: large page support") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Peter Xu +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/x86.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9873,6 +9873,13 @@ int kvm_arch_create_memslot(struct kvm * + { + int i; + ++ /* ++ * Clear out the previous array pointers for the KVM_MR_MOVE case. The ++ * old arrays will be freed by __kvm_set_memory_region() if installing ++ * the new memslot is successful. ++ */ ++ memset(&slot->arch, 0, sizeof(slot->arch)); ++ + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + struct kvm_lpage_info *linfo; + unsigned long ugfn; +@@ -9954,6 +9961,10 @@ int kvm_arch_prepare_memory_region(struc + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) + { ++ if (change == KVM_MR_MOVE) ++ return kvm_arch_create_memslot(kvm, memslot, ++ mem->memory_size >> PAGE_SHIFT); ++ + return 0; + } + diff --git a/queue-5.6/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch b/queue-5.6/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch new file mode 100644 index 00000000000..6da12a78039 --- /dev/null +++ b/queue-5.6/kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch @@ -0,0 +1,50 @@ +From d18b2f43b9147c8005ae0844fb445d8cc6a87e31 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Sun, 26 Jan 2020 16:41:11 -0800 +Subject: KVM: x86: Gracefully handle __vmalloc() failure during VM allocation + +From: Sean Christopherson + +commit d18b2f43b9147c8005ae0844fb445d8cc6a87e31 upstream. + +Check the result of __vmalloc() to avoid dereferencing a NULL pointer in +the event that allocation failres. + +Fixes: d1e5b0e98ea27 ("kvm: Make VM ioctl do valloc for some archs") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/svm.c | 4 ++++ + arch/x86/kvm/vmx/vmx.c | 4 ++++ + 2 files changed, 8 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1943,6 +1943,10 @@ static struct kvm *svm_vm_alloc(void) + struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), + GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); ++ ++ if (!kvm_svm) ++ return NULL; ++ + return &kvm_svm->kvm; + } + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6671,6 +6671,10 @@ static struct kvm *vmx_vm_alloc(void) + struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), + GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); ++ ++ if (!kvm_vmx) ++ return NULL; ++ + return &kvm_vmx->kvm; + } + diff --git a/queue-5.6/mtd-rawnand-cadence-change-bad-block-marker-size.patch b/queue-5.6/mtd-rawnand-cadence-change-bad-block-marker-size.patch new file mode 100644 index 00000000000..94fd3d12a59 --- /dev/null +++ b/queue-5.6/mtd-rawnand-cadence-change-bad-block-marker-size.patch @@ -0,0 +1,42 @@ +From 9bf1903bed7a2e84f5a8deedb38f7e0ac5e8bfc6 Mon Sep 17 00:00:00 2001 +From: Piotr Sroka +Date: Mon, 10 Feb 2020 10:55:27 +0100 +Subject: mtd: rawnand: cadence: change bad block marker size + +From: Piotr Sroka + +commit 9bf1903bed7a2e84f5a8deedb38f7e0ac5e8bfc6 upstream. + +Increase bad block marker size from one byte to two bytes. +Bad block marker is handled by skip bytes feature of HPNFC. +Controller expects this value to be an even number. + +Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") +Cc: stable@vger.kernel.org +Signed-off-by: Piotr Sroka +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/1581328530-29966-3-git-send-email-piotrs@cadence.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/raw/cadence-nand-controller.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/drivers/mtd/nand/raw/cadence-nand-controller.c ++++ b/drivers/mtd/nand/raw/cadence-nand-controller.c +@@ -2603,12 +2603,9 @@ int cadence_nand_attach_chip(struct nand + chip->options |= NAND_NO_SUBPAGE_WRITE; + + cdns_chip->bbm_offs = chip->badblockpos; +- if (chip->options & NAND_BUSWIDTH_16) { +- cdns_chip->bbm_offs &= ~0x01; +- cdns_chip->bbm_len = 2; +- } else { +- cdns_chip->bbm_len = 1; +- } ++ cdns_chip->bbm_offs &= ~0x01; ++ /* this value should be even number */ ++ cdns_chip->bbm_len = 2; + + ret = nand_ecc_choose_conf(chip, + &cdns_ctrl->ecc_caps, diff --git a/queue-5.6/mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch b/queue-5.6/mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch new file mode 100644 index 00000000000..289dedaa512 --- /dev/null +++ b/queue-5.6/mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch @@ -0,0 +1,43 @@ +From e4578af0354176ff6b4ae78b9998b4f479f7c31c Mon Sep 17 00:00:00 2001 +From: Piotr Sroka +Date: Mon, 10 Feb 2020 10:55:26 +0100 +Subject: mtd: rawnand: cadence: fix the calculation of the avaialble OOB size + +From: Piotr Sroka + +commit e4578af0354176ff6b4ae78b9998b4f479f7c31c upstream. + +The value of cdns_chip->sector_count is not known at the moment +of the derivation of ecc_size, leading to a zero value. Fix +this by assigning ecc_size later in the code. + +Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") +Cc: stable@vger.kernel.org +Signed-off-by: Piotr Sroka +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/1581328530-29966-2-git-send-email-piotrs@cadence.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/raw/cadence-nand-controller.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/mtd/nand/raw/cadence-nand-controller.c ++++ b/drivers/mtd/nand/raw/cadence-nand-controller.c +@@ -2585,7 +2585,7 @@ int cadence_nand_attach_chip(struct nand + { + struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller); + struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip); +- u32 ecc_size = cdns_chip->sector_count * chip->ecc.bytes; ++ u32 ecc_size; + struct mtd_info *mtd = nand_to_mtd(chip); + u32 max_oob_data_size; + int ret; +@@ -2625,6 +2625,7 @@ int cadence_nand_attach_chip(struct nand + /* Error correction configuration. */ + cdns_chip->sector_size = chip->ecc.size; + cdns_chip->sector_count = mtd->writesize / cdns_chip->sector_size; ++ ecc_size = cdns_chip->sector_count * chip->ecc.bytes; + + cdns_chip->avail_oob_size = mtd->oobsize - ecc_size; + diff --git a/queue-5.6/mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch b/queue-5.6/mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch new file mode 100644 index 00000000000..f4b47b287b7 --- /dev/null +++ b/queue-5.6/mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch @@ -0,0 +1,33 @@ +From 0d7d6c8183aadb1dcc13f415941404a7913b46b3 Mon Sep 17 00:00:00 2001 +From: Piotr Sroka +Date: Mon, 10 Feb 2020 10:55:28 +0100 +Subject: mtd: rawnand: cadence: reinit completion before executing a new command + +From: Piotr Sroka + +commit 0d7d6c8183aadb1dcc13f415941404a7913b46b3 upstream. + +Reing the completion object before executing CDMA command to make sure +the 'done' flag is OK. + +Fixes: ec4ba01e894d ("mtd: rawnand: Add new Cadence NAND driver to MTD subsystem") +Cc: stable@vger.kernel.org +Signed-off-by: Piotr Sroka +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/1581328530-29966-4-git-send-email-piotrs@cadence.com +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/raw/cadence-nand-controller.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/mtd/nand/raw/cadence-nand-controller.c ++++ b/drivers/mtd/nand/raw/cadence-nand-controller.c +@@ -997,6 +997,7 @@ static int cadence_nand_cdma_send(struct + return status; + + cadence_nand_reset_irq(cdns_ctrl); ++ reinit_completion(&cdns_ctrl->complete); + + writel_relaxed((u32)cdns_ctrl->dma_cdma_desc, + cdns_ctrl->reg + CMD_REG2); diff --git a/queue-5.6/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch b/queue-5.6/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch new file mode 100644 index 00000000000..30ccd177ed8 --- /dev/null +++ b/queue-5.6/mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch @@ -0,0 +1,50 @@ +From b645ad39d56846618704e463b24bb994c9585c7f Mon Sep 17 00:00:00 2001 +From: Frieder Schrempf +Date: Tue, 18 Feb 2020 10:05:35 +0000 +Subject: mtd: spinand: Do not erase the block before writing a bad block marker + +From: Frieder Schrempf + +commit b645ad39d56846618704e463b24bb994c9585c7f upstream. + +Currently when marking a block, we use spinand_erase_op() to erase +the block before writing the marker to the OOB area. Doing so without +waiting for the operation to finish can lead to the marking failing +silently and no bad block marker being written to the flash. + +In fact we don't need to do an erase at all before writing the BBM. +The ECC is disabled for raw accesses to the OOB data and we don't +need to work around any issues with chips reporting ECC errors as it +is known to be the case for raw NAND. + +Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs") +Cc: stable@vger.kernel.org +Signed-off-by: Frieder Schrempf +Reviewed-by: Boris Brezillon +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/20200218100432.32433-4-frieder.schrempf@kontron.de +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/spi/core.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/mtd/nand/spi/core.c ++++ b/drivers/mtd/nand/spi/core.c +@@ -612,7 +612,6 @@ static int spinand_markbad(struct nand_d + }; + int ret; + +- /* Erase block before marking it bad. */ + ret = spinand_select_target(spinand, pos->target); + if (ret) + return ret; +@@ -621,8 +620,6 @@ static int spinand_markbad(struct nand_d + if (ret) + return ret; + +- spinand_erase_op(spinand, pos); +- + return spinand_write_page(spinand, &req); + } + diff --git a/queue-5.6/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch b/queue-5.6/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch new file mode 100644 index 00000000000..2d5d6cc6354 --- /dev/null +++ b/queue-5.6/mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch @@ -0,0 +1,85 @@ +From 2148937501ee3d663e0010e519a553fea67ad103 Mon Sep 17 00:00:00 2001 +From: Frieder Schrempf +Date: Tue, 18 Feb 2020 10:05:14 +0000 +Subject: mtd: spinand: Stop using spinand->oobbuf for buffering bad block markers + +From: Frieder Schrempf + +commit 2148937501ee3d663e0010e519a553fea67ad103 upstream. + +For reading and writing the bad block markers, spinand->oobbuf is +currently used as a buffer for the marker bytes. During the +underlying read and write operations to actually get/set the content +of the OOB area, the content of spinand->oobbuf is reused and changed +by accessing it through spinand->oobbuf and/or spinand->databuf. + +This is a flaw in the original design of the SPI NAND core and at the +latest from 13c15e07eedf ("mtd: spinand: Handle the case where +PROGRAM LOAD does not reset the cache") on, it results in not having +the bad block marker written at all, as the spinand->oobbuf is +cleared to 0xff after setting the marker bytes to zero. + +To fix it, we now just store the two bytes for the marker on the +stack and let the read/write operations copy it from/to the page +buffer later. + +Fixes: 7529df465248 ("mtd: nand: Add core infrastructure to support SPI NANDs") +Cc: stable@vger.kernel.org +Signed-off-by: Frieder Schrempf +Reviewed-by: Boris Brezillon +Signed-off-by: Miquel Raynal +Link: https://lore.kernel.org/linux-mtd/20200218100432.32433-2-frieder.schrempf@kontron.de +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mtd/nand/spi/core.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/mtd/nand/spi/core.c ++++ b/drivers/mtd/nand/spi/core.c +@@ -568,18 +568,18 @@ static int spinand_mtd_write(struct mtd_ + static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos) + { + struct spinand_device *spinand = nand_to_spinand(nand); ++ u8 marker[2] = { }; + struct nand_page_io_req req = { + .pos = *pos, +- .ooblen = 2, ++ .ooblen = sizeof(marker), + .ooboffs = 0, +- .oobbuf.in = spinand->oobbuf, ++ .oobbuf.in = marker, + .mode = MTD_OPS_RAW, + }; + +- memset(spinand->oobbuf, 0, 2); + spinand_select_target(spinand, pos->target); + spinand_read_page(spinand, &req, false); +- if (spinand->oobbuf[0] != 0xff || spinand->oobbuf[1] != 0xff) ++ if (marker[0] != 0xff || marker[1] != 0xff) + return true; + + return false; +@@ -603,11 +603,12 @@ static int spinand_mtd_block_isbad(struc + static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos) + { + struct spinand_device *spinand = nand_to_spinand(nand); ++ u8 marker[2] = { }; + struct nand_page_io_req req = { + .pos = *pos, + .ooboffs = 0, +- .ooblen = 2, +- .oobbuf.out = spinand->oobbuf, ++ .ooblen = sizeof(marker), ++ .oobbuf.out = marker, + }; + int ret; + +@@ -622,7 +623,6 @@ static int spinand_markbad(struct nand_d + + spinand_erase_op(spinand, pos); + +- memset(spinand->oobbuf, 0, 2); + return spinand_write_page(spinand, &req); + } + diff --git a/queue-5.6/platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch b/queue-5.6/platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch new file mode 100644 index 00000000000..4140e4d773e --- /dev/null +++ b/queue-5.6/platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch @@ -0,0 +1,41 @@ +From 6b3586d45bba14f6912f37488090c37a3710e7b4 Mon Sep 17 00:00:00 2001 +From: Kristian Klausen +Date: Tue, 3 Mar 2020 19:02:15 +0100 +Subject: platform/x86: asus-wmi: Support laptops where the first battery is named BATT + +From: Kristian Klausen + +commit 6b3586d45bba14f6912f37488090c37a3710e7b4 upstream. + +The WMI method to set the charge threshold does not provide a +way to specific a battery, so we assume it is the first/primary +battery (by checking if the name is BAT0). +On some newer ASUS laptops (Zenbook UM431DA) though, the +primary/first battery isn't named BAT0 but BATT, so we need +to support that case. + +Fixes: 7973353e92ee ("platform/x86: asus-wmi: Refactor charge threshold to use the battery hooking API") +Cc: stable@vger.kernel.org +Signed-off-by: Kristian Klausen +Signed-off-by: Andy Shevchenko +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/platform/x86/asus-wmi.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/platform/x86/asus-wmi.c ++++ b/drivers/platform/x86/asus-wmi.c +@@ -426,8 +426,11 @@ static int asus_wmi_battery_add(struct p + { + /* The WMI method does not provide a way to specific a battery, so we + * just assume it is the first battery. ++ * Note: On some newer ASUS laptops (Zenbook UM431DA), the primary/first ++ * battery is named BATT. + */ +- if (strcmp(battery->desc->name, "BAT0") != 0) ++ if (strcmp(battery->desc->name, "BAT0") != 0 && ++ strcmp(battery->desc->name, "BATT") != 0) + return -ENODEV; + + if (device_create_file(&battery->dev, diff --git a/queue-5.6/remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch b/queue-5.6/remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch new file mode 100644 index 00000000000..a2f629086a0 --- /dev/null +++ b/queue-5.6/remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch @@ -0,0 +1,122 @@ +From 791c13b709dd51eb37330f2a5837434e90c87c27 Mon Sep 17 00:00:00 2001 +From: Nikita Shubin +Date: Fri, 6 Mar 2020 10:24:53 +0300 +Subject: remoteproc: Fix NULL pointer dereference in rproc_virtio_notify + +From: Nikita Shubin + +commit 791c13b709dd51eb37330f2a5837434e90c87c27 upstream. + +Undefined rproc_ops .kick method in remoteproc driver will result in +"Unable to handle kernel NULL pointer dereference" in rproc_virtio_notify, +after firmware loading if: + + 1) .kick method wasn't defined in driver + 2) resource_table exists in firmware and has "Virtio device entry" defined + +Let's refuse to register an rproc-induced virtio device if no kick method was +defined for rproc. + +[ 13.180049][ T415] 8<--- cut here --- +[ 13.190558][ T415] Unable to handle kernel NULL pointer dereference at virtual address 00000000 +[ 13.212544][ T415] pgd = (ptrval) +[ 13.217052][ T415] [00000000] *pgd=00000000 +[ 13.224692][ T415] Internal error: Oops: 80000005 [#1] PREEMPT SMP ARM +[ 13.231318][ T415] Modules linked in: rpmsg_char imx_rproc virtio_rpmsg_bus rpmsg_core [last unloaded: imx_rproc] +[ 13.241687][ T415] CPU: 0 PID: 415 Comm: unload-load.sh Not tainted 5.5.2-00002-g707df13bbbdd #6 +[ 13.250561][ T415] Hardware name: Freescale i.MX7 Dual (Device Tree) +[ 13.257009][ T415] PC is at 0x0 +[ 13.260249][ T415] LR is at rproc_virtio_notify+0x2c/0x54 +[ 13.265738][ T415] pc : [<00000000>] lr : [<8050f6b0>] psr: 60010113 +[ 13.272702][ T415] sp : b8d47c48 ip : 00000001 fp : bc04de00 +[ 13.278625][ T415] r10: bc04c000 r9 : 00000cc0 r8 : b8d46000 +[ 13.284548][ T415] r7 : 00000000 r6 : b898f200 r5 : 00000000 r4 : b8a29800 +[ 13.291773][ T415] r3 : 00000000 r2 : 990a3ad4 r1 : 00000000 r0 : b8a29800 +[ 13.299000][ T415] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none +[ 13.306833][ T415] Control: 10c5387d Table: b8b4806a DAC: 00000051 +[ 13.313278][ T415] Process unload-load.sh (pid: 415, stack limit = 0x(ptrval)) +[ 13.320591][ T415] Stack: (0xb8d47c48 to 0xb8d48000) +[ 13.325651][ T415] 7c40: b895b680 00000001 b898f200 803c6430 b895bc80 7f00ae18 +[ 13.334531][ T415] 7c60: 00000035 00000000 00000000 b9393200 80b3ed80 00004000 b9393268 bbf5a9a2 +[ 13.343410][ T415] 7c80: 00000e00 00000200 00000000 7f00aff0 7f00a014 b895b680 b895b800 990a3ad4 +[ 13.352290][ T415] 7ca0: 00000001 b898f210 b898f200 00000000 00000000 7f00e000 00000001 00000000 +[ 13.361170][ T415] 7cc0: 00000000 803c62e0 80b2169c 802a0924 b898f210 00000000 00000000 b898f210 +[ 13.370049][ T415] 7ce0: 80b9ba44 00000000 80b9ba48 00000000 7f00e000 00000008 80b2169c 80400114 +[ 13.378929][ T415] 7d00: 80b2169c 8061fd64 b898f210 7f00e000 80400744 b8d46000 80b21634 80b21634 +[ 13.387809][ T415] 7d20: 80b2169c 80400614 80b21634 80400718 7f00e000 00000000 b8d47d7c 80400744 +[ 13.396689][ T415] 7d40: b8d46000 80b21634 80b21634 803fe338 b898f254 b80fe76c b8d32e38 990a3ad4 +[ 13.405569][ T415] 7d60: fffffff3 b898f210 b8d46000 00000001 b898f254 803ffe7c 80857a90 b898f210 +[ 13.414449][ T415] 7d80: 00000001 990a3ad4 b8d46000 b898f210 b898f210 80b17aec b8a29c20 803ff0a4 +[ 13.423328][ T415] 7da0: b898f210 00000000 b8d46000 803fb8e0 b898f200 00000000 80b17aec b898f210 +[ 13.432209][ T415] 7dc0: b8a29c20 990a3ad4 b895b900 b898f200 8050fb7c 80b17aec b898f210 b8a29c20 +[ 13.441088][ T415] 7de0: b8a29800 b895b900 b8a29a04 803c5ec0 b8a29c00 b898f200 b8a29a20 00000007 +[ 13.449968][ T415] 7e00: b8a29c20 8050fd78 b8a29800 00000000 b8a29a20 b8a29c04 b8a29820 b8a299d0 +[ 13.458848][ T415] 7e20: b895b900 8050e5a4 b8a29800 b8a299d8 b8d46000 b8a299e0 b8a29820 b8a299d0 +[ 13.467728][ T415] 7e40: b895b900 8050e008 000041ed 00000000 b8b8c440 b8a299d8 b8a299e0 b8a299d8 +[ 13.476608][ T415] 7e60: b8b8c440 990a3ad4 00000000 b8a29820 b8b8c400 00000006 b8a29800 b895b880 +[ 13.485487][ T415] 7e80: b8d47f78 00000000 00000000 8050f4b4 00000006 b895b890 b8b8c400 008fbea0 +[ 13.494367][ T415] 7ea0: b895b880 8029f530 00000000 00000000 b8d46000 00000006 b8d46000 008fbea0 +[ 13.503246][ T415] 7ec0: 8029f434 00000000 b8d46000 00000000 00000000 8021e2e4 0000000a 8061fd0c +[ 13.512125][ T415] 7ee0: 0000000a b8af0c00 0000000a b8af0c40 00000001 b8af0c40 00000000 8061f910 +[ 13.521005][ T415] 7f00: 0000000a 80240af4 00000002 b8d46000 00000000 8061fd0c 00000002 80232d7c +[ 13.529884][ T415] 7f20: 00000000 b8d46000 00000000 990a3ad4 00000000 00000006 b8a62d80 008fbea0 +[ 13.538764][ T415] 7f40: b8d47f78 00000000 b8d46000 00000000 00000000 802210c0 b88f2900 00000000 +[ 13.547644][ T415] 7f60: b8a62d80 b8a62d80 b8d46000 00000006 008fbea0 80221320 00000000 00000000 +[ 13.556524][ T415] 7f80: b8af0c00 990a3ad4 0000006c 008fbea0 76f1cda0 00000004 80101204 00000004 +[ 13.565403][ T415] 7fa0: 00000000 80101000 0000006c 008fbea0 00000001 008fbea0 00000006 00000000 +[ 13.574283][ T415] 7fc0: 0000006c 008fbea0 76f1cda0 00000004 00000006 00000006 00000000 00000000 +[ 13.583162][ T415] 7fe0: 00000004 7ebaf7d0 76eb4c0b 76e3f206 600d0030 00000001 00000000 00000000 +[ 13.592056][ T415] [<8050f6b0>] (rproc_virtio_notify) from [<803c6430>] (virtqueue_notify+0x1c/0x34) +[ 13.601298][ T415] [<803c6430>] (virtqueue_notify) from [<7f00ae18>] (rpmsg_probe+0x280/0x380 [virtio_rpmsg_bus]) +[ 13.611663][ T415] [<7f00ae18>] (rpmsg_probe [virtio_rpmsg_bus]) from [<803c62e0>] (virtio_dev_probe+0x1f8/0x2c4) +[ 13.622022][ T415] [<803c62e0>] (virtio_dev_probe) from [<80400114>] (really_probe+0x200/0x450) +[ 13.630817][ T415] [<80400114>] (really_probe) from [<80400614>] (driver_probe_device+0x16c/0x1ac) +[ 13.639873][ T415] [<80400614>] (driver_probe_device) from [<803fe338>] (bus_for_each_drv+0x84/0xc8) +[ 13.649102][ T415] [<803fe338>] (bus_for_each_drv) from [<803ffe7c>] (__device_attach+0xd4/0x164) +[ 13.658069][ T415] [<803ffe7c>] (__device_attach) from [<803ff0a4>] (bus_probe_device+0x84/0x8c) +[ 13.666950][ T415] [<803ff0a4>] (bus_probe_device) from [<803fb8e0>] (device_add+0x444/0x768) +[ 13.675572][ T415] [<803fb8e0>] (device_add) from [<803c5ec0>] (register_virtio_device+0xa4/0xfc) +[ 13.684541][ T415] [<803c5ec0>] (register_virtio_device) from [<8050fd78>] (rproc_add_virtio_dev+0xcc/0x1b8) +[ 13.694466][ T415] [<8050fd78>] (rproc_add_virtio_dev) from [<8050e5a4>] (rproc_start+0x148/0x200) +[ 13.703521][ T415] [<8050e5a4>] (rproc_start) from [<8050e008>] (rproc_boot+0x384/0x5c0) +[ 13.711708][ T415] [<8050e008>] (rproc_boot) from [<8050f4b4>] (state_store+0x3c/0xc8) +[ 13.719723][ T415] [<8050f4b4>] (state_store) from [<8029f530>] (kernfs_fop_write+0xfc/0x214) +[ 13.728348][ T415] [<8029f530>] (kernfs_fop_write) from [<8021e2e4>] (__vfs_write+0x30/0x1cc) +[ 13.736971][ T415] [<8021e2e4>] (__vfs_write) from [<802210c0>] (vfs_write+0xac/0x17c) +[ 13.744985][ T415] [<802210c0>] (vfs_write) from [<80221320>] (ksys_write+0x64/0xe4) +[ 13.752825][ T415] [<80221320>] (ksys_write) from [<80101000>] (ret_fast_syscall+0x0/0x54) +[ 13.761178][ T415] Exception stack(0xb8d47fa8 to 0xb8d47ff0) +[ 13.766932][ T415] 7fa0: 0000006c 008fbea0 00000001 008fbea0 00000006 00000000 +[ 13.775811][ T415] 7fc0: 0000006c 008fbea0 76f1cda0 00000004 00000006 00000006 00000000 00000000 +[ 13.784687][ T415] 7fe0: 00000004 7ebaf7d0 76eb4c0b 76e3f206 +[ 13.790442][ T415] Code: bad PC value +[ 13.839214][ T415] ---[ end trace 1fe21ecfc9f28852 ]--- + +Reviewed-by: Mathieu Poirier +Signed-off-by: Nikita Shubin +Fixes: 7a186941626d ("remoteproc: remove the single rpmsg vdev limitation") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20200306072452.24743-1-NShubin@topcon.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/remoteproc/remoteproc_virtio.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/remoteproc/remoteproc_virtio.c ++++ b/drivers/remoteproc/remoteproc_virtio.c +@@ -334,6 +334,13 @@ int rproc_add_virtio_dev(struct rproc_vd + struct rproc_mem_entry *mem; + int ret; + ++ if (rproc->ops->kick == NULL) { ++ ret = -EINVAL; ++ dev_err(dev, ".kick method not defined for %s", ++ rproc->name); ++ goto out; ++ } ++ + /* Try to find dedicated vdev buffer carveout */ + mem = rproc_find_carveout_by_name(rproc, "vdev%dbuffer", rvdev->index); + if (mem) { diff --git a/queue-5.6/remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch b/queue-5.6/remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch new file mode 100644 index 00000000000..b49fc19826a --- /dev/null +++ b/queue-5.6/remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch @@ -0,0 +1,100 @@ +From 900fc60df22748dbc28e4970838e8f7b8f1013ce Mon Sep 17 00:00:00 2001 +From: Bjorn Andersson +Date: Thu, 5 Mar 2020 01:17:27 +0530 +Subject: remoteproc: qcom_q6v5_mss: Don't reassign mpss region on shutdown + +From: Bjorn Andersson + +commit 900fc60df22748dbc28e4970838e8f7b8f1013ce upstream. + +Trying to reclaim mpss memory while the mba is not running causes the +system to crash on devices with security fuses blown, so leave it +assigned to the remote on shutdown and recover it on a subsequent boot. + +Fixes: 6c5a9dc2481b ("remoteproc: qcom: Make secure world call for mem ownership switch") +Cc: stable@vger.kernel.org +Signed-off-by: Bjorn Andersson +Signed-off-by: Sibi Sankar +Tested-by: Bjorn Andersson +Link: https://lore.kernel.org/r/20200304194729.27979-2-sibis@codeaurora.org +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/remoteproc/qcom_q6v5_mss.c | 35 ++++++++++++++++++++++++----------- + 1 file changed, 24 insertions(+), 11 deletions(-) + +--- a/drivers/remoteproc/qcom_q6v5_mss.c ++++ b/drivers/remoteproc/qcom_q6v5_mss.c +@@ -1001,11 +1001,6 @@ static void q6v5_mba_reclaim(struct q6v5 + writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG); + } + +- ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, +- false, qproc->mpss_phys, +- qproc->mpss_size); +- WARN_ON(ret); +- + q6v5_reset_assert(qproc); + + q6v5_clk_disable(qproc->dev, qproc->reset_clks, +@@ -1095,6 +1090,14 @@ static int q6v5_mpss_load(struct q6v5 *q + max_addr = ALIGN(phdr->p_paddr + phdr->p_memsz, SZ_4K); + } + ++ /** ++ * In case of a modem subsystem restart on secure devices, the modem ++ * memory can be reclaimed only after MBA is loaded. For modem cold ++ * boot this will be a nop ++ */ ++ q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, false, ++ qproc->mpss_phys, qproc->mpss_size); ++ + mpss_reloc = relocate ? min_addr : qproc->mpss_phys; + qproc->mpss_reloc = mpss_reloc; + /* Load firmware segments */ +@@ -1184,8 +1187,16 @@ static void qcom_q6v5_dump_segment(struc + void *ptr = rproc_da_to_va(rproc, segment->da, segment->size); + + /* Unlock mba before copying segments */ +- if (!qproc->dump_mba_loaded) ++ if (!qproc->dump_mba_loaded) { + ret = q6v5_mba_load(qproc); ++ if (!ret) { ++ /* Reset ownership back to Linux to copy segments */ ++ ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, ++ false, ++ qproc->mpss_phys, ++ qproc->mpss_size); ++ } ++ } + + if (!ptr || ret) + memset(dest, 0xff, segment->size); +@@ -1196,8 +1207,14 @@ static void qcom_q6v5_dump_segment(struc + + /* Reclaim mba after copying segments */ + if (qproc->dump_segment_mask == qproc->dump_complete_mask) { +- if (qproc->dump_mba_loaded) ++ if (qproc->dump_mba_loaded) { ++ /* Try to reset ownership back to Q6 */ ++ q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, ++ true, ++ qproc->mpss_phys, ++ qproc->mpss_size); + q6v5_mba_reclaim(qproc); ++ } + } + } + +@@ -1237,10 +1254,6 @@ static int q6v5_start(struct rproc *rpro + return 0; + + reclaim_mpss: +- xfermemop_ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, +- false, qproc->mpss_phys, +- qproc->mpss_size); +- WARN_ON(xfermemop_ret); + q6v5_mba_reclaim(qproc); + + return ret; diff --git a/queue-5.6/remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch b/queue-5.6/remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch new file mode 100644 index 00000000000..bdcda123dc8 --- /dev/null +++ b/queue-5.6/remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch @@ -0,0 +1,67 @@ +From d96f2571dc84d128cacf1944f4ecc87834c779a6 Mon Sep 17 00:00:00 2001 +From: Sibi Sankar +Date: Thu, 5 Mar 2020 01:17:29 +0530 +Subject: remoteproc: qcom_q6v5_mss: Reload the mba region on coredump + +From: Sibi Sankar + +commit d96f2571dc84d128cacf1944f4ecc87834c779a6 upstream. + +On secure devices after a wdog/fatal interrupt, the mba region has to be +refreshed in order to prevent the following errors during mba load. + +Err Logs: +remoteproc remoteproc2: stopped remote processor 4080000.remoteproc +qcom-q6v5-mss 4080000.remoteproc: PBL returned unexpected status -284031232 +qcom-q6v5-mss 4080000.remoteproc: PBL returned unexpected status -284031232 +.... +qcom-q6v5-mss 4080000.remoteproc: PBL returned unexpected status -284031232 +qcom-q6v5-mss 4080000.remoteproc: MBA booted, loading mpss + +Fixes: 7dd8ade24dc2a ("remoteproc: qcom: q6v5-mss: Add custom dump function for modem") +Cc: stable@vger.kernel.org +Signed-off-by: Sibi Sankar +Tested-by: Bjorn Andersson +Link: https://lore.kernel.org/r/20200304194729.27979-4-sibis@codeaurora.org +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/remoteproc/qcom_q6v5_mss.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/drivers/remoteproc/qcom_q6v5_mss.c ++++ b/drivers/remoteproc/qcom_q6v5_mss.c +@@ -1030,6 +1030,23 @@ static void q6v5_mba_reclaim(struct q6v5 + } + } + ++static int q6v5_reload_mba(struct rproc *rproc) ++{ ++ struct q6v5 *qproc = rproc->priv; ++ const struct firmware *fw; ++ int ret; ++ ++ ret = request_firmware(&fw, rproc->firmware, qproc->dev); ++ if (ret < 0) ++ return ret; ++ ++ q6v5_load(rproc, fw); ++ ret = q6v5_mba_load(qproc); ++ release_firmware(fw); ++ ++ return ret; ++} ++ + static int q6v5_mpss_load(struct q6v5 *qproc) + { + const struct elf32_phdr *phdrs; +@@ -1188,7 +1205,7 @@ static void qcom_q6v5_dump_segment(struc + + /* Unlock mba before copying segments */ + if (!qproc->dump_mba_loaded) { +- ret = q6v5_mba_load(qproc); ++ ret = q6v5_reload_mba(rproc); + if (!ret) { + /* Reset ownership back to Linux to copy segments */ + ret = q6v5_xfer_mem_ownership(qproc, &qproc->mpss_perm, diff --git a/queue-5.6/series b/queue-5.6/series index c8d19b7a7d8..b3452b966de 100644 --- a/queue-5.6/series +++ b/queue-5.6/series @@ -127,3 +127,35 @@ x86-tsc_msr-use-named-struct-initializers.patch x86-tsc_msr-fix-msr_fsb_freq-mask-for-cherry-trail-devices.patch x86-tsc_msr-make-msr-derived-tsc-frequency-more-accurate.patch x86-entry-32-add-missing-asm_clac-to-general_protection-entry.patch +platform-x86-asus-wmi-support-laptops-where-the-first-battery-is-named-batt.patch +kvm-ppc-book3s-hv-skip-kvmppc_uvmem_free-if-ultravisor-is-not-supported.patch +kvm-nvmx-properly-handle-userspace-interrupt-window-request.patch +kvm-s390-vsie-fix-region-1-asce-sanity-shadow-address-checks.patch +kvm-s390-vsie-fix-delivery-of-addressing-exceptions.patch +kvm-x86-allocate-new-rmap-and-large-page-tracking-when-moving-memslot.patch +kvm-vmx-always-vmclear-in-use-vmcses-during-crash-with-kexec-support.patch +kvm-x86-gracefully-handle-__vmalloc-failure-during-vm-allocation.patch +kvm-vmx-add-a-trampoline-to-fix-vmread-error-handling.patch +kvm-vmx-fix-crash-cleanup-when-kvm-wasn-t-used.patch +smb3-fix-performance-regression-with-setting-mtime.patch +cifs-fix-bug-which-the-return-value-by-asynchronous-read-is-error.patch +cifs-check-new-file-size-when-extending-file-by-fallocate.patch +mtd-spinand-stop-using-spinand-oobbuf-for-buffering-bad-block-markers.patch +mtd-spinand-do-not-erase-the-block-before-writing-a-bad-block-marker.patch +mtd-rawnand-cadence-fix-the-calculation-of-the-avaialble-oob-size.patch +mtd-rawnand-cadence-change-bad-block-marker-size.patch +mtd-rawnand-cadence-reinit-completion-before-executing-a-new-command.patch +drm-i915-gen12-disable-preemption-timeout.patch +btrfs-don-t-submit-any-btree-write-bio-if-the-fs-has-errors.patch +btrfs-fix-btrfs_calc_reclaim_metadata_size-calculation.patch +btrfs-fix-crash-during-unmount-due-to-race-with-delayed-inode-workers.patch +btrfs-reloc-clean-dirty-subvols-if-we-fail-to-start-a-transaction.patch +btrfs-set-update-the-uuid-generation-as-soon-as-possible.patch +btrfs-drop-block-from-cache-on-error-in-relocation.patch +btrfs-fix-missing-file-extent-item-for-hole-after-ranged-fsync.patch +btrfs-unset-reloc-control-if-we-fail-to-recover.patch +btrfs-fix-missing-semaphore-unlock-in-btrfs_sync_file.patch +btrfs-use-nofs-allocations-for-running-delayed-items.patch +remoteproc-qcom_q6v5_mss-don-t-reassign-mpss-region-on-shutdown.patch +remoteproc-qcom_q6v5_mss-reload-the-mba-region-on-coredump.patch +remoteproc-fix-null-pointer-dereference-in-rproc_virtio_notify.patch diff --git a/queue-5.6/smb3-fix-performance-regression-with-setting-mtime.patch b/queue-5.6/smb3-fix-performance-regression-with-setting-mtime.patch new file mode 100644 index 00000000000..01a09968ca5 --- /dev/null +++ b/queue-5.6/smb3-fix-performance-regression-with-setting-mtime.patch @@ -0,0 +1,65 @@ +From cf5371ae460eb8e484e4884747af270c86c3c469 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Mon, 24 Feb 2020 17:37:39 -0600 +Subject: smb3: fix performance regression with setting mtime + +From: Steve French + +commit cf5371ae460eb8e484e4884747af270c86c3c469 upstream. + +There are cases when we don't want to send the SMB2 flush operation +(e.g. when user specifies mount parm "nostrictsync") and it can be +a very expensive operation on the server. In most cases in order +to set mtime, we simply need to flush (write) the dirtry pages from +the client and send the writes to the server not also send a flush +protocol operation to the server. + +Fixes: aa081859b10c ("cifs: flush before set-info if we have writeable handles") +CC: Stable +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/inode.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/fs/cifs/inode.c ++++ b/fs/cifs/inode.c +@@ -2516,25 +2516,26 @@ cifs_setattr_nounix(struct dentry *diren + + /* + * Attempt to flush data before changing attributes. We need to do +- * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the +- * ownership or mode then we may also need to do this. Here, we take +- * the safe way out and just do the flush on all setattr requests. If +- * the flush returns error, store it to report later and continue. ++ * this for ATTR_SIZE and ATTR_MTIME. If the flush of the data ++ * returns error, store it to report later and continue. + * + * BB: This should be smarter. Why bother flushing pages that + * will be truncated anyway? Also, should we error out here if +- * the flush returns error? ++ * the flush returns error? Do we need to check for ATTR_MTIME_SET flag? + */ +- rc = filemap_write_and_wait(inode->i_mapping); +- if (is_interrupt_error(rc)) { +- rc = -ERESTARTSYS; +- goto cifs_setattr_exit; ++ if (attrs->ia_valid & (ATTR_MTIME | ATTR_SIZE | ATTR_CTIME)) { ++ rc = filemap_write_and_wait(inode->i_mapping); ++ if (is_interrupt_error(rc)) { ++ rc = -ERESTARTSYS; ++ goto cifs_setattr_exit; ++ } ++ mapping_set_error(inode->i_mapping, rc); + } + +- mapping_set_error(inode->i_mapping, rc); + rc = 0; + +- if (attrs->ia_valid & ATTR_MTIME) { ++ if ((attrs->ia_valid & ATTR_MTIME) && ++ !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile); + if (!rc) { + tcon = tlink_tcon(wfile->tlink); -- 2.47.3