From: Greg Kroah-Hartman Date: Thu, 27 Feb 2020 09:23:57 +0000 (+0100) Subject: 5.5-stable patches X-Git-Tag: v4.4.215~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=15a1438854ef48ae506c7b192f53e5ebd8f78bae;p=thirdparty%2Fkernel%2Fstable-queue.git 5.5-stable patches added patches: btrfs-destroy-qgroup-extent-records-on-transaction-abort.patch btrfs-do-not-check-delayed-items-are-empty-for-single-transaction-cleanup.patch btrfs-fix-btrfs_wait_ordered_range-so-that-it-waits-for-all-ordered-extents.patch btrfs-fix-bytes_may_use-underflow-in-prealloc-error-condtition.patch btrfs-fix-deadlock-during-fast-fsync-when-logging-prealloc-extents-beyond-eof.patch btrfs-reset-fs_root-to-null-on-error-in-open_ctree.patch crypto-chacha20poly1305-prevent-integer-overflow-on-large-input.patch kvm-apic-avoid-calculating-pending-eoi-from-an-uninitialized-val.patch kvm-nvmx-clear-pin_based_posted_intr-from-nested-pinbased_ctls-only-when-apicv-is-globally-disabled.patch kvm-nvmx-handle-nested-posted-interrupts-when-apicv-is-disabled-for-l1.patch --- diff --git a/queue-5.5/btrfs-destroy-qgroup-extent-records-on-transaction-abort.patch b/queue-5.5/btrfs-destroy-qgroup-extent-records-on-transaction-abort.patch new file mode 100644 index 00000000000..eca49f92d51 --- /dev/null +++ b/queue-5.5/btrfs-destroy-qgroup-extent-records-on-transaction-abort.patch @@ -0,0 +1,82 @@ +From 81f7eb00ff5bb8326e82503a32809421d14abb8a Mon Sep 17 00:00:00 2001 +From: Jeff Mahoney +Date: Tue, 11 Feb 2020 15:25:37 +0800 +Subject: btrfs: destroy qgroup extent records on transaction abort + +From: Jeff Mahoney + +commit 81f7eb00ff5bb8326e82503a32809421d14abb8a upstream. + +We clean up the delayed references when we abort a transaction but we +leave the pending qgroup extent records behind, leaking memory. + +This patch destroys the extent records when we destroy the delayed refs +and makes sure ensure they're gone before releasing the transaction. + +Fixes: 3368d001ba5d ("btrfs: qgroup: Record possible quota-related extent for qgroup.") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Jeff Mahoney +[ Rebased to latest upstream, remove to_qgroup() helper, use + rbtree_postorder_for_each_entry_safe() wrapper ] +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 1 + + fs/btrfs/qgroup.c | 13 +++++++++++++ + fs/btrfs/qgroup.h | 1 + + fs/btrfs/transaction.c | 2 ++ + 4 files changed, 17 insertions(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4272,6 +4272,7 @@ static int btrfs_destroy_delayed_refs(st + cond_resched(); + spin_lock(&delayed_refs->lock); + } ++ btrfs_qgroup_destroy_extent_records(trans); + + spin_unlock(&delayed_refs->lock); + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -4016,3 +4016,16 @@ out: + } + return ret; + } ++ ++void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) ++{ ++ struct btrfs_qgroup_extent_record *entry; ++ struct btrfs_qgroup_extent_record *next; ++ struct rb_root *root; ++ ++ root = &trans->delayed_refs.dirty_extent_root; ++ rbtree_postorder_for_each_entry_safe(entry, next, root, node) { ++ ulist_free(entry->old_roots); ++ kfree(entry); ++ } ++} +--- a/fs/btrfs/qgroup.h ++++ b/fs/btrfs/qgroup.h +@@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(stru + u64 last_snapshot); + int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *eb); ++void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); + + #endif +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_ + BUG_ON(!list_empty(&transaction->list)); + WARN_ON(!RB_EMPTY_ROOT( + &transaction->delayed_refs.href_root.rb_root)); ++ WARN_ON(!RB_EMPTY_ROOT( ++ &transaction->delayed_refs.dirty_extent_root)); + if (transaction->delayed_refs.pending_csums) + btrfs_err(transaction->fs_info, + "pending csums is %llu", diff --git a/queue-5.5/btrfs-do-not-check-delayed-items-are-empty-for-single-transaction-cleanup.patch b/queue-5.5/btrfs-do-not-check-delayed-items-are-empty-for-single-transaction-cleanup.patch new file mode 100644 index 00000000000..b67c37af47a --- /dev/null +++ b/queue-5.5/btrfs-do-not-check-delayed-items-are-empty-for-single-transaction-cleanup.patch @@ -0,0 +1,41 @@ +From 1e90315149f3fe148e114a5de86f0196d1c21fa5 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 13 Feb 2020 10:47:29 -0500 +Subject: btrfs: do not check delayed items are empty for single transaction cleanup + +From: Josef Bacik + +commit 1e90315149f3fe148e114a5de86f0196d1c21fa5 upstream. + +btrfs_assert_delayed_root_empty() will check if the delayed root is +completely empty, but this is a filesystem-wide check. On cleanup we +may have allowed other transactions to begin, for whatever reason, and +thus the delayed root is not empty. + +So remove this check from cleanup_one_transation(). This however can +stay in btrfs_cleanup_transaction(), because it checks only after all of +the transactions have been properly cleaned up, and thus is valid. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Johannes Thumshirn +Reviewed-by: Nikolay Borisov +Reviewed-by: Qu Wenruo +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4499,7 +4499,6 @@ void btrfs_cleanup_one_transaction(struc + wake_up(&fs_info->transaction_wait); + + btrfs_destroy_delayed_inodes(fs_info); +- btrfs_assert_delayed_root_empty(fs_info); + + btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, + EXTENT_DIRTY); diff --git a/queue-5.5/btrfs-fix-btrfs_wait_ordered_range-so-that-it-waits-for-all-ordered-extents.patch b/queue-5.5/btrfs-fix-btrfs_wait_ordered_range-so-that-it-waits-for-all-ordered-extents.patch new file mode 100644 index 00000000000..64ead78a226 --- /dev/null +++ b/queue-5.5/btrfs-fix-btrfs_wait_ordered_range-so-that-it-waits-for-all-ordered-extents.patch @@ -0,0 +1,59 @@ +From e75fd33b3f744f644061a4f9662bd63f5434f806 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 13 Feb 2020 12:29:50 +0000 +Subject: Btrfs: fix btrfs_wait_ordered_range() so that it waits for all ordered extents + +From: Filipe Manana + +commit e75fd33b3f744f644061a4f9662bd63f5434f806 upstream. + +In btrfs_wait_ordered_range() once we find an ordered extent that has +finished with an error we exit the loop and don't wait for any other +ordered extents that might be still in progress. + +All the users of btrfs_wait_ordered_range() expect that there are no more +ordered extents in progress after that function returns. So past fixes +such like the ones from the two following commits: + + ff612ba7849964 ("btrfs: fix panic during relocation after ENOSPC before + writeback happens") + + 28aeeac1dd3080 ("Btrfs: fix panic when starting bg cache writeout after + IO error") + +don't work when there are multiple ordered extents in the range. + +Fix that by making btrfs_wait_ordered_range() wait for all ordered extents +even after it finds one that had an error. + +Link: https://github.com/kdave/btrfs-progs/issues/228#issuecomment-569777554 +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Qu Wenruo +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ordered-data.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ordered-data.c ++++ b/fs/btrfs/ordered-data.c +@@ -686,10 +686,15 @@ int btrfs_wait_ordered_range(struct inod + } + btrfs_start_ordered_extent(inode, ordered, 1); + end = ordered->file_offset; ++ /* ++ * If the ordered extent had an error save the error but don't ++ * exit without waiting first for all other ordered extents in ++ * the range to complete. ++ */ + if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) + ret = -EIO; + btrfs_put_ordered_extent(ordered); +- if (ret || end == 0 || end == start) ++ if (end == 0 || end == start) + break; + end--; + } diff --git a/queue-5.5/btrfs-fix-bytes_may_use-underflow-in-prealloc-error-condtition.patch b/queue-5.5/btrfs-fix-bytes_may_use-underflow-in-prealloc-error-condtition.patch new file mode 100644 index 00000000000..f1b82641f45 --- /dev/null +++ b/queue-5.5/btrfs-fix-bytes_may_use-underflow-in-prealloc-error-condtition.patch @@ -0,0 +1,96 @@ +From b778cf962d71a0e737923d55d0432f3bd287258e Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 13 Feb 2020 10:47:31 -0500 +Subject: btrfs: fix bytes_may_use underflow in prealloc error condtition + +From: Josef Bacik + +commit b778cf962d71a0e737923d55d0432f3bd287258e upstream. + +I hit the following warning while running my error injection stress +testing: + + WARNING: CPU: 3 PID: 1453 at fs/btrfs/space-info.h:108 btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] + RIP: 0010:btrfs_free_reserved_data_space_noquota+0xfd/0x160 [btrfs] + Call Trace: + btrfs_free_reserved_data_space+0x4f/0x70 [btrfs] + __btrfs_prealloc_file_range+0x378/0x470 [btrfs] + elfcorehdr_read+0x40/0x40 + ? elfcorehdr_read+0x40/0x40 + ? btrfs_commit_transaction+0xca/0xa50 [btrfs] + ? dput+0xb4/0x2a0 + ? btrfs_log_dentry_safe+0x55/0x70 [btrfs] + ? btrfs_sync_file+0x30e/0x420 [btrfs] + ? do_fsync+0x38/0x70 + ? __x64_sys_fdatasync+0x13/0x20 + ? do_syscall_64+0x5b/0x1b0 + ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +This happens if we fail to insert our reserved file extent. At this +point we've already converted our reservation from ->bytes_may_use to +->bytes_reserved. However once we break we will attempt to free +everything from [cur_offset, end] from ->bytes_may_use, but our extent +reservation will overlap part of this. + +Fix this problem by adding ins.offset (our extent allocation size) to +cur_offset so we remove the actual remaining part from ->bytes_may_use. + +I validated this fix using my inject-error.py script + +python inject-error.py -o should_fail_bio -t cache_save_setup -t \ + __btrfs_prealloc_file_range \ + -t insert_reserved_file_extent.constprop.0 \ + -r "-5" ./run-fsstress.sh + +where run-fsstress.sh simply mounts and runs fsstress on a disk. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -10487,6 +10487,7 @@ static int __btrfs_prealloc_file_range(s + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key ins; + u64 cur_offset = start; ++ u64 clear_offset = start; + u64 i_size; + u64 cur_bytes; + u64 last_alloc = (u64)-1; +@@ -10521,6 +10522,15 @@ static int __btrfs_prealloc_file_range(s + btrfs_end_transaction(trans); + break; + } ++ ++ /* ++ * We've reserved this space, and thus converted it from ++ * ->bytes_may_use to ->bytes_reserved. Any error that happens ++ * from here on out we will only need to clear our reservation ++ * for the remaining unreserved area, so advance our ++ * clear_offset by our extent size. ++ */ ++ clear_offset += ins.offset; + btrfs_dec_block_group_reservations(fs_info, ins.objectid); + + last_alloc = ins.offset; +@@ -10600,9 +10610,9 @@ next: + if (own_trans) + btrfs_end_transaction(trans); + } +- if (cur_offset < end) +- btrfs_free_reserved_data_space(inode, NULL, cur_offset, +- end - cur_offset + 1); ++ if (clear_offset < end) ++ btrfs_free_reserved_data_space(inode, NULL, clear_offset, ++ end - clear_offset + 1); + return ret; + } + diff --git a/queue-5.5/btrfs-fix-deadlock-during-fast-fsync-when-logging-prealloc-extents-beyond-eof.patch b/queue-5.5/btrfs-fix-deadlock-during-fast-fsync-when-logging-prealloc-extents-beyond-eof.patch new file mode 100644 index 00000000000..534d2bcfa74 --- /dev/null +++ b/queue-5.5/btrfs-fix-deadlock-during-fast-fsync-when-logging-prealloc-extents-beyond-eof.patch @@ -0,0 +1,214 @@ +From a5ae50dea9111db63d30d700766dd5509602f7ad Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Thu, 20 Feb 2020 13:29:49 +0000 +Subject: Btrfs: fix deadlock during fast fsync when logging prealloc extents beyond eof + +From: Filipe Manana + +commit a5ae50dea9111db63d30d700766dd5509602f7ad upstream. + +While logging the prealloc extents of an inode during a fast fsync we call +btrfs_truncate_inode_items(), through btrfs_log_prealloc_extents(), while +holding a read lock on a leaf of the inode's root (not the log root, the +fs/subvol root), and then that function locks the file range in the inode's +iotree. This can lead to a deadlock when: + +* the fsync is ranged + +* the file has prealloc extents beyond eof + +* writeback for a range different from the fsync range starts + during the fsync + +* the size of the file is not sector size aligned + +Because when finishing an ordered extent we lock first a file range and +then try to COW the fs/subvol tree to insert an extent item. + +The following diagram shows how the deadlock can happen. + + CPU 1 CPU 2 + + btrfs_sync_file() + --> for range [0, 1MiB) + + --> inode has a size of + 1MiB and has 1 prealloc + extent beyond the + i_size, starting at offset + 4MiB + + flushes all delalloc for the + range [0MiB, 1MiB) and waits + for the respective ordered + extents to complete + + --> before task at CPU 1 locks the + inode, a write into file range + [1MiB, 2MiB + 1KiB) is made + + --> i_size is updated to 2MiB + 1KiB + + --> writeback is started for that + range, [1MiB, 2MiB + 4KiB) + --> end offset rounded up to + be sector size aligned + + btrfs_log_dentry_safe() + btrfs_log_inode_parent() + btrfs_log_inode() + + btrfs_log_changed_extents() + btrfs_log_prealloc_extents() + --> does a search on the + inode's root + --> holds a read lock on + leaf X + + btrfs_finish_ordered_io() + --> locks range [1MiB, 2MiB + 4KiB) + --> end offset rounded up + to be sector size aligned + + --> tries to cow leaf X, through + insert_reserved_file_extent() + --> already locked by the + task at CPU 1 + + btrfs_truncate_inode_items() + + --> gets an i_size of + 2MiB + 1KiB, which is + not sector size + aligned + + --> tries to lock file + range [2MiB, (u64)-1) + --> the start range + is rounded down + from 2MiB + 1K + to 2MiB to be sector + size aligned + + --> but the subrange + [2MiB, 2MiB + 4KiB) is + already locked by + task at CPU 2 which + is waiting to get a + write lock on leaf X + for which we are + holding a read lock + + *** deadlock *** + +This results in a stack trace like the following, triggered by test case +generic/561 from fstests: + + [ 2779.973608] INFO: task kworker/u8:6:247 blocked for more than 120 seconds. + [ 2779.979536] Not tainted 5.6.0-rc2-btrfs-next-53 #1 + [ 2779.984503] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + [ 2779.990136] kworker/u8:6 D 0 247 2 0x80004000 + [ 2779.990457] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] + [ 2779.990466] Call Trace: + [ 2779.990491] ? __schedule+0x384/0xa30 + [ 2779.990521] schedule+0x33/0xe0 + [ 2779.990616] btrfs_tree_read_lock+0x19e/0x2e0 [btrfs] + [ 2779.990632] ? remove_wait_queue+0x60/0x60 + [ 2779.990730] btrfs_read_lock_root_node+0x2f/0x40 [btrfs] + [ 2779.990782] btrfs_search_slot+0x510/0x1000 [btrfs] + [ 2779.990869] btrfs_lookup_file_extent+0x4a/0x70 [btrfs] + [ 2779.990944] __btrfs_drop_extents+0x161/0x1060 [btrfs] + [ 2779.990987] ? mark_held_locks+0x6d/0xc0 + [ 2779.990994] ? __slab_alloc.isra.49+0x99/0x100 + [ 2779.991060] ? insert_reserved_file_extent.constprop.19+0x64/0x300 [btrfs] + [ 2779.991145] insert_reserved_file_extent.constprop.19+0x97/0x300 [btrfs] + [ 2779.991222] ? start_transaction+0xdd/0x5c0 [btrfs] + [ 2779.991291] btrfs_finish_ordered_io+0x4f4/0x840 [btrfs] + [ 2779.991405] btrfs_work_helper+0xaa/0x720 [btrfs] + [ 2779.991432] process_one_work+0x26d/0x6a0 + [ 2779.991460] worker_thread+0x4f/0x3e0 + [ 2779.991481] ? process_one_work+0x6a0/0x6a0 + [ 2779.991489] kthread+0x103/0x140 + [ 2779.991499] ? kthread_create_worker_on_cpu+0x70/0x70 + [ 2779.991515] ret_from_fork+0x3a/0x50 + (...) + [ 2780.026211] INFO: task fsstress:17375 blocked for more than 120 seconds. + [ 2780.027480] Not tainted 5.6.0-rc2-btrfs-next-53 #1 + [ 2780.028482] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + [ 2780.030035] fsstress D 0 17375 17373 0x00004000 + [ 2780.030038] Call Trace: + [ 2780.030044] ? __schedule+0x384/0xa30 + [ 2780.030052] schedule+0x33/0xe0 + [ 2780.030075] lock_extent_bits+0x20c/0x320 [btrfs] + [ 2780.030094] ? btrfs_truncate_inode_items+0xf4/0x1150 [btrfs] + [ 2780.030098] ? rcu_read_lock_sched_held+0x59/0xa0 + [ 2780.030102] ? remove_wait_queue+0x60/0x60 + [ 2780.030122] btrfs_truncate_inode_items+0x133/0x1150 [btrfs] + [ 2780.030151] ? btrfs_set_path_blocking+0xb2/0x160 [btrfs] + [ 2780.030165] ? btrfs_search_slot+0x379/0x1000 [btrfs] + [ 2780.030195] btrfs_log_changed_extents.isra.8+0x841/0x93e [btrfs] + [ 2780.030202] ? do_raw_spin_unlock+0x49/0xc0 + [ 2780.030215] ? btrfs_get_num_csums+0x10/0x10 [btrfs] + [ 2780.030239] btrfs_log_inode+0xf83/0x1124 [btrfs] + [ 2780.030251] ? __mutex_unlock_slowpath+0x45/0x2a0 + [ 2780.030275] btrfs_log_inode_parent+0x2a0/0xe40 [btrfs] + [ 2780.030282] ? dget_parent+0xa1/0x370 + [ 2780.030309] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] + [ 2780.030329] btrfs_sync_file+0x3f3/0x490 [btrfs] + [ 2780.030339] do_fsync+0x38/0x60 + [ 2780.030343] __x64_sys_fdatasync+0x13/0x20 + [ 2780.030345] do_syscall_64+0x5c/0x280 + [ 2780.030348] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [ 2780.030356] RIP: 0033:0x7f2d80f6d5f0 + [ 2780.030361] Code: Bad RIP value. + [ 2780.030362] RSP: 002b:00007ffdba3c8548 EFLAGS: 00000246 ORIG_RAX: 000000000000004b + [ 2780.030364] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2d80f6d5f0 + [ 2780.030365] RDX: 00007ffdba3c84b0 RSI: 00007ffdba3c84b0 RDI: 0000000000000003 + [ 2780.030367] RBP: 000000000000004a R08: 0000000000000001 R09: 00007ffdba3c855c + [ 2780.030368] R10: 0000000000000078 R11: 0000000000000246 R12: 00000000000001f4 + [ 2780.030369] R13: 0000000051eb851f R14: 00007ffdba3c85f0 R15: 0000557a49220d90 + +So fix this by making btrfs_truncate_inode_items() not lock the range in +the inode's iotree when the target root is a log root, since it's not +needed to lock the range for log roots as the protection from the inode's +lock and log_mutex are all that's needed. + +Fixes: 28553fa992cb28 ("Btrfs: fix race between shrinking truncate and fiemap") +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/inode.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4775,8 +4775,9 @@ int btrfs_truncate_inode_items(struct bt + return -ENOMEM; + path->reada = READA_BACK; + +- lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, +- &cached_state); ++ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) ++ lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, ++ &cached_state); + + /* + * We want to drop from the next block forward in case this new size is +@@ -5040,11 +5041,10 @@ out: + if (!ret && last_size > new_size) + last_size = new_size; + btrfs_ordered_update_i_size(inode, last_size, NULL); ++ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, ++ (u64)-1, &cached_state); + } + +- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, +- &cached_state); +- + btrfs_free_path(path); + return ret; + } diff --git a/queue-5.5/btrfs-reset-fs_root-to-null-on-error-in-open_ctree.patch b/queue-5.5/btrfs-reset-fs_root-to-null-on-error-in-open_ctree.patch new file mode 100644 index 00000000000..a82666a2988 --- /dev/null +++ b/queue-5.5/btrfs-reset-fs_root-to-null-on-error-in-open_ctree.patch @@ -0,0 +1,37 @@ +From 315bf8ef914f31d51d084af950703aa1e09a728c Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 13 Feb 2020 10:47:28 -0500 +Subject: btrfs: reset fs_root to NULL on error in open_ctree + +From: Josef Bacik + +commit 315bf8ef914f31d51d084af950703aa1e09a728c upstream. + +While running my error injection script I hit a panic when we tried to +clean up the fs_root when freeing the fs_root. This is because +fs_info->fs_root == PTR_ERR(-EIO), which isn't great. Fix this by +setting fs_info->fs_root = NULL; if we fail to read the root. + +CC: stable@vger.kernel.org # 4.4+ +Reviewed-by: Nikolay Borisov +Reviewed-by: Johannes Thumshirn +Reviewed-by: Qu Wenruo +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/disk-io.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3200,6 +3200,7 @@ int __cold open_ctree(struct super_block + if (IS_ERR(fs_info->fs_root)) { + err = PTR_ERR(fs_info->fs_root); + btrfs_warn(fs_info, "failed to read fs tree: %d", err); ++ fs_info->fs_root = NULL; + goto fail_qgroup; + } + diff --git a/queue-5.5/crypto-chacha20poly1305-prevent-integer-overflow-on-large-input.patch b/queue-5.5/crypto-chacha20poly1305-prevent-integer-overflow-on-large-input.patch new file mode 100644 index 00000000000..141584eddf7 --- /dev/null +++ b/queue-5.5/crypto-chacha20poly1305-prevent-integer-overflow-on-large-input.patch @@ -0,0 +1,42 @@ +From c9cc0517bba9f0213f1e55172feceb99e5512daf Mon Sep 17 00:00:00 2001 +From: "Jason A. Donenfeld" +Date: Thu, 6 Feb 2020 12:42:01 +0100 +Subject: crypto: chacha20poly1305 - prevent integer overflow on large input + +From: Jason A. Donenfeld + +commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream. + +This code assigns src_len (size_t) to sl (int), which causes problems +when src_len is very large. Probably nobody in the kernel should be +passing this much data to chacha20poly1305 all in one go anyway, so I +don't think we need to change the algorithm or introduce larger types +or anything. But we should at least error out early in this case and +print a warning so that we get reports if this does happen and can look +into why anybody is possibly passing it that much data or if they're +accidently passing -1 or similar. + +Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine") +Cc: Ard Biesheuvel +Cc: stable@vger.kernel.org # 5.5+ +Signed-off-by: Jason A. Donenfeld +Acked-by: Ard Biesheuvel +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman + +--- + lib/crypto/chacha20poly1305.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/lib/crypto/chacha20poly1305.c ++++ b/lib/crypto/chacha20poly1305.c +@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(s + __le64 lens[2]; + } b __aligned(16); + ++ if (WARN_ON(src_len > INT_MAX)) ++ return false; ++ + chacha_load_key(b.k, key); + + b.iv[0] = 0; diff --git a/queue-5.5/kvm-apic-avoid-calculating-pending-eoi-from-an-uninitialized-val.patch b/queue-5.5/kvm-apic-avoid-calculating-pending-eoi-from-an-uninitialized-val.patch new file mode 100644 index 00000000000..d6de01c72eb --- /dev/null +++ b/queue-5.5/kvm-apic-avoid-calculating-pending-eoi-from-an-uninitialized-val.patch @@ -0,0 +1,38 @@ +From 23520b2def95205f132e167cf5b25c609975e959 Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Fri, 21 Feb 2020 22:04:46 +0800 +Subject: KVM: apic: avoid calculating pending eoi from an uninitialized val + +From: Miaohe Lin + +commit 23520b2def95205f132e167cf5b25c609975e959 upstream. + +When pv_eoi_get_user() fails, 'val' may remain uninitialized and the return +value of pv_eoi_get_pending() becomes random. Fix the issue by initializing +the variable. + +Reviewed-by: Vitaly Kuznetsov +Signed-off-by: Miaohe Lin +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/lapic.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -630,9 +630,11 @@ static inline bool pv_eoi_enabled(struct + static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) + { + u8 val; +- if (pv_eoi_get_user(vcpu, &val) < 0) ++ if (pv_eoi_get_user(vcpu, &val) < 0) { + printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", + (unsigned long long)vcpu->arch.pv_eoi.msr_val); ++ return false; ++ } + return val & 0x1; + } + diff --git a/queue-5.5/kvm-nvmx-clear-pin_based_posted_intr-from-nested-pinbased_ctls-only-when-apicv-is-globally-disabled.patch b/queue-5.5/kvm-nvmx-clear-pin_based_posted_intr-from-nested-pinbased_ctls-only-when-apicv-is-globally-disabled.patch new file mode 100644 index 00000000000..122351d347f --- /dev/null +++ b/queue-5.5/kvm-nvmx-clear-pin_based_posted_intr-from-nested-pinbased_ctls-only-when-apicv-is-globally-disabled.patch @@ -0,0 +1,122 @@ +From a4443267800af240072280c44521caab61924e55 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 20 Feb 2020 18:22:04 +0100 +Subject: KVM: nVMX: clear PIN_BASED_POSTED_INTR from nested pinbased_ctls only when apicv is globally disabled + +From: Vitaly Kuznetsov + +commit a4443267800af240072280c44521caab61924e55 upstream. + +When apicv is disabled on a vCPU (e.g. by enabling KVM_CAP_HYPERV_SYNIC*), +nothing happens to VMX MSRs on the already existing vCPUs, however, all new +ones are created with PIN_BASED_POSTED_INTR filtered out. This is very +confusing and results in the following picture inside the guest: + +$ rdmsr -ax 0x48d +ff00000016 +7f00000016 +7f00000016 +7f00000016 + +This is observed with QEMU and 4-vCPU guest: QEMU creates vCPU0, does +KVM_CAP_HYPERV_SYNIC2 and then creates the remaining three. + +L1 hypervisor may only check CPU0's controls to find out what features +are available and it will be very confused later. Switch to setting +PIN_BASED_POSTED_INTR control based on global 'enable_apicv' setting. + +Signed-off-by: Vitaly Kuznetsov +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx/capabilities.h | 1 + + arch/x86/kvm/vmx/nested.c | 5 ++--- + arch/x86/kvm/vmx/nested.h | 3 +-- + arch/x86/kvm/vmx/vmx.c | 10 ++++------ + 4 files changed, 8 insertions(+), 11 deletions(-) + +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept; + extern bool __read_mostly enable_unrestricted_guest; + extern bool __read_mostly enable_ept_ad_bits; + extern bool __read_mostly enable_pml; ++extern bool __read_mostly enable_apicv; + extern int __read_mostly pt_mode; + + #define PT_MODE_SYSTEM 0 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -5979,8 +5979,7 @@ void nested_vmx_set_vmcs_shadowing_bitma + * bit in the high half is on if the corresponding bit in the control field + * may be on. See also vmx_control_verify(). + */ +-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, +- bool apicv) ++void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) + { + /* + * Note that as a general rule, the high half of the MSRs (bits in +@@ -6007,7 +6006,7 @@ void nested_vmx_setup_ctls_msrs(struct n + PIN_BASED_EXT_INTR_MASK | + PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS | +- (apicv ? PIN_BASED_POSTED_INTR : 0); ++ (enable_apicv ? PIN_BASED_POSTED_INTR : 0); + msrs->pinbased_ctls_high |= + PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | + PIN_BASED_VMX_PREEMPTION_TIMER; +--- a/arch/x86/kvm/vmx/nested.h ++++ b/arch/x86/kvm/vmx/nested.h +@@ -17,8 +17,7 @@ enum nvmx_vmentry_status { + }; + + void vmx_leave_nested(struct kvm_vcpu *vcpu); +-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, +- bool apicv); ++void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); + void nested_vmx_hardware_unsetup(void); + __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); + void nested_vmx_set_vmcs_shadowing_bitmap(void); +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state + static bool __read_mostly fasteoi = 1; + module_param(fasteoi, bool, S_IRUGO); + +-static bool __read_mostly enable_apicv = 1; ++bool __read_mostly enable_apicv = 1; + module_param(enable_apicv, bool, S_IRUGO); + + /* +@@ -6803,8 +6803,7 @@ static struct kvm_vcpu *vmx_create_vcpu( + + if (nested) + nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, +- vmx_capability.ept, +- kvm_vcpu_apicv_active(&vmx->vcpu)); ++ vmx_capability.ept); + else + memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); + +@@ -6884,8 +6883,7 @@ static int __init vmx_check_processor_co + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) + return -EIO; + if (nested) +- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, +- enable_apicv); ++ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); + if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { + printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", + smp_processor_id()); +@@ -7792,7 +7790,7 @@ static __init int hardware_setup(void) + + if (nested) { + nested_vmx_setup_ctls_msrs(&vmcs_config.nested, +- vmx_capability.ept, enable_apicv); ++ vmx_capability.ept); + + r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); + if (r) diff --git a/queue-5.5/kvm-nvmx-handle-nested-posted-interrupts-when-apicv-is-disabled-for-l1.patch b/queue-5.5/kvm-nvmx-handle-nested-posted-interrupts-when-apicv-is-disabled-for-l1.patch new file mode 100644 index 00000000000..bef1893946d --- /dev/null +++ b/queue-5.5/kvm-nvmx-handle-nested-posted-interrupts-when-apicv-is-disabled-for-l1.patch @@ -0,0 +1,113 @@ +From 91a5f413af596ad01097e59bf487eb07cb3f1331 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 20 Feb 2020 18:22:05 +0100 +Subject: KVM: nVMX: handle nested posted interrupts when apicv is disabled for L1 + +From: Vitaly Kuznetsov + +commit 91a5f413af596ad01097e59bf487eb07cb3f1331 upstream. + +Even when APICv is disabled for L1 it can (and, actually, is) still +available for L2, this means we need to always call +vmx_deliver_nested_posted_interrupt() when attempting an interrupt +delivery. + +Suggested-by: Paolo Bonzini +Signed-off-by: Vitaly Kuznetsov +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_host.h | 2 +- + arch/x86/kvm/lapic.c | 5 +---- + arch/x86/kvm/svm.c | 7 ++++++- + arch/x86/kvm/vmx/vmx.c | 13 +++++++++---- + 4 files changed, 17 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1115,7 +1115,7 @@ struct kvm_x86_ops { + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); +- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); ++ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); + int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); +--- a/arch/x86/kvm/lapic.c ++++ b/arch/x86/kvm/lapic.c +@@ -1049,11 +1049,8 @@ static int __apic_accept_irq(struct kvm_ + apic->regs + APIC_TMR); + } + +- if (vcpu->arch.apicv_active) +- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); +- else { ++ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { + kvm_lapic_set_irr(vector, apic); +- + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -5160,8 +5160,11 @@ static void svm_load_eoi_exitmap(struct + return; + } + +-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) ++static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) + { ++ if (!vcpu->arch.apicv_active) ++ return -1; ++ + kvm_lapic_set_irr(vec, vcpu->arch.apic); + smp_mb__after_atomic(); + +@@ -5173,6 +5176,8 @@ static void svm_deliver_avic_intr(struct + put_cpu(); + } else + kvm_vcpu_wake_up(vcpu); ++ ++ return 0; + } + + static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -3848,24 +3848,29 @@ static int vmx_deliver_nested_posted_int + * 2. If target vcpu isn't running(root mode), kick it to pick up the + * interrupt from PIR in next vmentry. + */ +-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) ++static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) + { + struct vcpu_vmx *vmx = to_vmx(vcpu); + int r; + + r = vmx_deliver_nested_posted_interrupt(vcpu, vector); + if (!r) +- return; ++ return 0; ++ ++ if (!vcpu->arch.apicv_active) ++ return -1; + + if (pi_test_and_set_pir(vector, &vmx->pi_desc)) +- return; ++ return 0; + + /* If a previous notification has sent the IPI, nothing to do. */ + if (pi_test_and_set_on(&vmx->pi_desc)) +- return; ++ return 0; + + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + kvm_vcpu_kick(vcpu); ++ ++ return 0; + } + + /* diff --git a/queue-5.5/series b/queue-5.5/series index 4c5b08924f0..70866314736 100644 --- a/queue-5.5/series +++ b/queue-5.5/series @@ -97,3 +97,13 @@ ext4-fix-race-between-writepages-and-enabling-ext4_extents_fl.patch drm-i915-execlists-always-force-a-context-reload-when-rewinding-ring_tail.patch kvm-nvmx-refactor-io-bitmap-checks-into-helper-function.patch kvm-nvmx-check-io-instruction-vm-exit-conditions.patch +kvm-nvmx-clear-pin_based_posted_intr-from-nested-pinbased_ctls-only-when-apicv-is-globally-disabled.patch +kvm-nvmx-handle-nested-posted-interrupts-when-apicv-is-disabled-for-l1.patch +kvm-apic-avoid-calculating-pending-eoi-from-an-uninitialized-val.patch +crypto-chacha20poly1305-prevent-integer-overflow-on-large-input.patch +btrfs-destroy-qgroup-extent-records-on-transaction-abort.patch +btrfs-fix-bytes_may_use-underflow-in-prealloc-error-condtition.patch +btrfs-reset-fs_root-to-null-on-error-in-open_ctree.patch +btrfs-do-not-check-delayed-items-are-empty-for-single-transaction-cleanup.patch +btrfs-fix-btrfs_wait_ordered_range-so-that-it-waits-for-all-ordered-extents.patch +btrfs-fix-deadlock-during-fast-fsync-when-logging-prealloc-extents-beyond-eof.patch