From: Sasha Levin Date: Sun, 9 Feb 2020 22:18:07 +0000 (-0500) Subject: fixes for 5.4 X-Git-Tag: v4.19.103~22^2~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a7acc133fd935e8cfdd6a621281e96ffb7169d5e;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch b/queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch new file mode 100644 index 00000000000..fef7280214c --- /dev/null +++ b/queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch @@ -0,0 +1,57 @@ +From 61485f017f9e2f99a64cbfac202f5a5e11f21874 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Jan 2020 09:17:06 -0500 +Subject: btrfs: free block groups after free'ing fs trees + +From: Josef Bacik + +[ Upstream commit 4e19443da1941050b346f8fc4c368aa68413bc88 ] + +Sometimes when running generic/475 we would trip the +WARN_ON(cache->reserved) check when free'ing the block groups on umount. +This is because sometimes we don't commit the transaction because of IO +errors and thus do not cleanup the tree logs until at umount time. + +These blocks are still reserved until they are cleaned up, but they +aren't cleaned up until _after_ we do the free block groups work. Fix +this by moving the free after free'ing the fs roots, that way all of the +tree logs are cleaned up and we have a properly cleaned fs. A bunch of +loops of generic/475 confirmed this fixes the problem. + +CC: stable@vger.kernel.org # 4.9+ +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 835abaabd67d6..7becc5e96f923 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4046,11 +4046,18 @@ void close_ctree(struct btrfs_fs_info *fs_info) + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); + btrfs_stop_all_workers(fs_info); + +- btrfs_free_block_groups(fs_info); +- + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); + free_root_pointers(fs_info, true); + ++ /* ++ * We must free the block groups after dropping the fs_roots as we could ++ * have had an IO error and have left over tree log blocks that aren't ++ * cleaned up until the fs roots are freed. This makes the block group ++ * accounting appear to be wrong because there's pending reserved bytes, ++ * so make sure we do the block group cleanup afterwards. ++ */ ++ btrfs_free_block_groups(fs_info); ++ + iput(fs_info->btree_inode); + + #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY +-- +2.20.1 + diff --git a/queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch b/queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch new file mode 100644 index 00000000000..6983109c281 --- /dev/null +++ b/queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch @@ -0,0 +1,73 @@ +From ce8be75289952f2432de954f6baffbf624b8a83c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Oct 2019 10:39:25 +0800 +Subject: btrfs: use bool argument in free_root_pointers() + +From: Anand Jain + +[ Upstream commit 4273eaff9b8d5e141113a5bdf9628c02acf3afe5 ] + +We don't need int argument bool shall do in free_root_pointers(). And +rename the argument as it confused two people. + +Reviewed-by: Qu Wenruo +Signed-off-by: Anand Jain +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 68266928a4aa7..835abaabd67d6 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2016,7 +2016,7 @@ static void free_root_extent_buffers(struct btrfs_root *root) + } + + /* helper to cleanup tree roots */ +-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) ++static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) + { + free_root_extent_buffers(info->tree_root); + +@@ -2025,7 +2025,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) + free_root_extent_buffers(info->csum_root); + free_root_extent_buffers(info->quota_root); + free_root_extent_buffers(info->uuid_root); +- if (chunk_root) ++ if (free_chunk_root) + free_root_extent_buffers(info->chunk_root); + free_root_extent_buffers(info->free_space_root); + } +@@ -3323,7 +3323,7 @@ int open_ctree(struct super_block *sb, + btrfs_put_block_group_cache(fs_info); + + fail_tree_roots: +- free_root_pointers(fs_info, 1); ++ free_root_pointers(fs_info, true); + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); + + fail_sb_buffer: +@@ -3355,7 +3355,7 @@ int open_ctree(struct super_block *sb, + if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) + goto fail_tree_roots; + +- free_root_pointers(fs_info, 0); ++ free_root_pointers(fs_info, false); + + /* don't use the log in recovery mode, it won't be valid */ + btrfs_set_super_log_root(disk_super, 0); +@@ -4049,7 +4049,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) + btrfs_free_block_groups(fs_info); + + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); +- free_root_pointers(fs_info, 1); ++ free_root_pointers(fs_info, true); + + iput(fs_info->btree_inode); + +-- +2.20.1 + diff --git a/queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch b/queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch new file mode 100644 index 00000000000..a5b81a670b2 --- /dev/null +++ b/queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch @@ -0,0 +1,92 @@ +From 149296b2439f9e86545fac26358882efd86da7c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Dec 2019 17:00:43 +0800 +Subject: drm/dp_mst: Remove VCPI while disabling topology mgr + +From: Wayne Lin + +[ Upstream commit 64e62bdf04ab8529f45ed0a85122c703035dec3a ] + +[Why] + +This patch is trying to address the issue observed when hotplug DP +daisy chain monitors. + +e.g. +src-mstb-mstb-sst -> src (unplug) mstb-mstb-sst -> src-mstb-mstb-sst +(plug in again) + +Once unplug a DP MST capable device, driver will call +drm_dp_mst_topology_mgr_set_mst() to disable MST. In this function, +it cleans data of topology manager while disabling mst_state. However, +it doesn't clean up the proposed_vcpis of topology manager. +If proposed_vcpi is not reset, once plug in MST daisy chain monitors +later, code will fail at checking port validation while trying to +allocate payloads. + +When MST capable device is plugged in again and try to allocate +payloads by calling drm_dp_update_payload_part1(), this +function will iterate over all proposed virtual channels to see if +any proposed VCPI's num_slots is greater than 0. If any proposed +VCPI's num_slots is greater than 0 and the port which the +specific virtual channel directed to is not in the topology, code then +fails at the port validation. Since there are stale VCPI allocations +from the previous topology enablement in proposed_vcpi[], code will fail +at port validation and reurn EINVAL. + +[How] + +Clean up the data of stale proposed_vcpi[] and reset mgr->proposed_vcpis +to NULL while disabling mst in drm_dp_mst_topology_mgr_set_mst(). + +Changes since v1: +*Add on more details in commit message to describe the issue which the +patch is trying to fix + +Signed-off-by: Wayne Lin +[added cc to stable] +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20191205090043.7580-1-Wayne.Lin@amd.com +Cc: # v3.17+ +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c +index a48a4c21b1b38..c5e9e2305fffc 100644 +--- a/drivers/gpu/drm/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/drm_dp_mst_topology.c +@@ -2694,6 +2694,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw, + int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state) + { + int ret = 0; ++ int i = 0; + struct drm_dp_mst_branch *mstb = NULL; + + mutex_lock(&mgr->lock); +@@ -2754,10 +2755,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms + /* this can fail if the device is gone */ + drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); + ret = 0; ++ mutex_lock(&mgr->payload_lock); + memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); + mgr->payload_mask = 0; + set_bit(0, &mgr->payload_mask); ++ for (i = 0; i < mgr->max_payloads; i++) { ++ struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i]; ++ ++ if (vcpi) { ++ vcpi->vcpi = 0; ++ vcpi->num_slots = 0; ++ } ++ mgr->proposed_vcpis[i] = NULL; ++ } + mgr->vcpi_mask = 0; ++ mutex_unlock(&mgr->payload_lock); + } + + out_unlock: +-- +2.20.1 + diff --git a/queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch b/queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch new file mode 100644 index 00000000000..4961ea1b088 --- /dev/null +++ b/queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch @@ -0,0 +1,43 @@ +From 94ed217445d05f7ba9be36dd0615c7b72bd00d45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 28 Dec 2019 14:25:24 +0800 +Subject: KVM: nVMX: vmread should not set rflags to specify success in case of + #PF + +From: Miaohe Lin + +[ Upstream commit a4d956b9390418623ae5d07933e2679c68b6f83c ] + +In case writing to vmread destination operand result in a #PF, vmread +should not call nested_vmx_succeed() to set rflags to specify success. +Similar to as done in VMPTRST (See handle_vmptrst()). + +Reviewed-by: Liran Alon +Signed-off-by: Miaohe Lin +Cc: stable@vger.kernel.org +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/vmx/nested.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index d0523741fb037..931d3b5f3acd4 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4663,8 +4663,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) + vmx_instruction_info, true, len, &gva)) + return 1; + /* _system ok, nested_vmx_check_permission has verified cpl=0 */ +- if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) ++ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) { + kvm_inject_page_fault(vcpu, &e); ++ return 1; ++ } + } + + return nested_vmx_succeed(vcpu); +-- +2.20.1 + diff --git a/queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch b/queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch new file mode 100644 index 00000000000..ec62d75f61c --- /dev/null +++ b/queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch @@ -0,0 +1,46 @@ +From 3dd1eb528e0e4e66be6ca17bffd484fd670b49ee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Jan 2020 12:24:38 -0800 +Subject: KVM: Play nice with read-only memslots when querying host page size + +From: Sean Christopherson + +[ Upstream commit 42cde48b2d39772dba47e680781a32a6c4b7dc33 ] + +Avoid the "writable" check in __gfn_to_hva_many(), which will always fail +on read-only memslots due to gfn_to_hva() assuming writes. Functionally, +this allows x86 to create large mappings for read-only memslots that +are backed by HugeTLB mappings. + +Note, the changelog for commit 05da45583de9 ("KVM: MMU: large page +support") states "If the largepage contains write-protected pages, a +large pte is not used.", but "write-protected" refers to pages that are +temporarily read-only, e.g. read-only memslots didn't even exist at the +time. + +Fixes: 4d8b81abc47b ("KVM: introduce readonly memslot") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +[Redone using kvm_vcpu_gfn_to_memslot_prot. - Paolo] +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + virt/kvm/kvm_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 877ce955b99cf..b5ea1bafe513c 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1401,7 +1401,7 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) + + size = PAGE_SIZE; + +- addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); ++ addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); + if (kvm_is_error_hva(addr)) + return PAGE_SIZE; + +-- +2.20.1 + diff --git a/queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch b/queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch new file mode 100644 index 00000000000..2094cb97406 --- /dev/null +++ b/queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch @@ -0,0 +1,103 @@ +From 333f404a8192aabb67094c9ab213609235e2d135 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Jan 2020 12:24:37 -0800 +Subject: KVM: Use vcpu-specific gva->hva translation when querying host page + size + +From: Sean Christopherson + +[ Upstream commit f9b84e19221efc5f493156ee0329df3142085f28 ] + +Use kvm_vcpu_gfn_to_hva() when retrieving the host page size so that the +correct set of memslots is used when handling x86 page faults in SMM. + +Fixes: 54bf36aac520 ("KVM: x86: use vcpu-specific functions to read/write/translate GFNs") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/powerpc/kvm/book3s_xive_native.c | 2 +- + arch/x86/kvm/mmu.c | 6 +++--- + include/linux/kvm_host.h | 2 +- + virt/kvm/kvm_main.c | 4 ++-- + 4 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c +index 5a3373e06e60b..235d57d6c205e 100644 +--- a/arch/powerpc/kvm/book3s_xive_native.c ++++ b/arch/powerpc/kvm/book3s_xive_native.c +@@ -638,7 +638,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, + srcu_idx = srcu_read_lock(&kvm->srcu); + gfn = gpa_to_gfn(kvm_eq.qaddr); + +- page_size = kvm_host_page_size(kvm, gfn); ++ page_size = kvm_host_page_size(vcpu, gfn); + if (1ull << kvm_eq.qshift > page_size) { + srcu_read_unlock(&kvm->srcu, srcu_idx); + pr_warn("Incompatible host page size %lx!\n", page_size); +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 46070da9e08f8..518100ea5ef47 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1286,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, + return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); + } + +-static int host_mapping_level(struct kvm *kvm, gfn_t gfn) ++static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) + { + unsigned long page_size; + int i, ret = 0; + +- page_size = kvm_host_page_size(kvm, gfn); ++ page_size = kvm_host_page_size(vcpu, gfn); + + for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + if (page_size >= KVM_HPAGE_SIZE(i)) +@@ -1341,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; + +- host_level = host_mapping_level(vcpu->kvm, large_gfn); ++ host_level = host_mapping_level(vcpu, large_gfn); + + if (host_level == PT_PAGE_TABLE_LEVEL) + return host_level; +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index 7d1564c408972..b81f0f1ded5f9 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -751,7 +751,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); + int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); + struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); + bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); ++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); + void mark_page_dirty(struct kvm *kvm, gfn_t gfn); + + struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c +index 6f0287ad927ee..877ce955b99cf 100644 +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1394,14 +1394,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) + } + EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); + +-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) ++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) + { + struct vm_area_struct *vma; + unsigned long addr, size; + + size = PAGE_SIZE; + +- addr = gfn_to_hva(kvm, gfn); ++ addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); + if (kvm_is_error_hva(addr)) + return PAGE_SIZE; + +-- +2.20.1 + diff --git a/queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch b/queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch new file mode 100644 index 00000000000..bd9f083d55f --- /dev/null +++ b/queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch @@ -0,0 +1,72 @@ +From f9b490c807854b02b1af757256fe41c95cb0859d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 18 Jan 2020 20:09:03 +0100 +Subject: KVM: x86: fix overlap between SPTE_MMIO_MASK and generation + +From: Paolo Bonzini + +[ Upstream commit 56871d444bc4d7ea66708775e62e2e0926384dbc ] + +The SPTE_MMIO_MASK overlaps with the bits used to track MMIO +generation number. A high enough generation number would overwrite the +SPTE_SPECIAL_MASK region and cause the MMIO SPTE to be misinterpreted. + +Likewise, setting bits 52 and 53 would also cause an incorrect generation +number to be read from the PTE, though this was partially mitigated by the +(useless if it weren't for the bug) removal of SPTE_SPECIAL_MASK from +the spte in get_mmio_spte_generation. Drop that removal, and replace +it with a compile-time assertion. + +Fixes: 6eeb4ef049e7 ("KVM: x86: assign two bits to track SPTE kinds") +Reported-by: Ben Gardon +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index d7aa34bb318a5..46070da9e08f8 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) + * requires a full MMU zap). The flag is instead explicitly queried when + * checking for MMIO spte cache hits. + */ +-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) ++#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) + + #define MMIO_SPTE_GEN_LOW_START 3 + #define MMIO_SPTE_GEN_LOW_END 11 + #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) + +-#define MMIO_SPTE_GEN_HIGH_START 52 +-#define MMIO_SPTE_GEN_HIGH_END 61 ++#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT ++#define MMIO_SPTE_GEN_HIGH_END 62 + #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ + MMIO_SPTE_GEN_HIGH_START) ++ + static u64 generation_mmio_spte_mask(u64 gen) + { + u64 mask; + + WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); ++ BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); + + mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; +@@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) + { + u64 gen; + +- spte &= ~shadow_mmio_mask; +- + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; + return gen; +-- +2.20.1 + diff --git a/queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch b/queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch new file mode 100644 index 00000000000..9c7e9310840 --- /dev/null +++ b/queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch @@ -0,0 +1,46 @@ +From d2ec8bee9b100de0eef44cfa4073f0e623432fc0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Jan 2020 16:12:10 -0800 +Subject: KVM: x86/mmu: Apply max PA check for MMIO sptes to 32-bit KVM + +From: Sean Christopherson + +[ Upstream commit e30a7d623dccdb3f880fbcad980b0cb589a1da45 ] + +Remove the bogus 64-bit only condition from the check that disables MMIO +spte optimization when the system supports the max PA, i.e. doesn't have +any reserved PA bits. 32-bit KVM always uses PAE paging for the shadow +MMU, and per Intel's SDM: + + PAE paging translates 32-bit linear addresses to 52-bit physical + addresses. + +The kernel's restrictions on max physical addresses are limits on how +much memory the kernel can reasonably use, not what physical addresses +are supported by hardware. + +Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 2ce9da58611ed..3644ac215567b 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -6249,7 +6249,7 @@ static void kvm_set_mmio_spte_mask(void) + * If reserved bit is not supported, clear the present bit to disable + * mmio page fault. + */ +- if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) ++ if (shadow_phys_bits == 52) + mask &= ~1ull; + + kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); +-- +2.20.1 + diff --git a/queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch b/queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch new file mode 100644 index 00000000000..f0a0fb102c1 --- /dev/null +++ b/queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch @@ -0,0 +1,57 @@ +From d2978d773ea6b7bc495fe4ce1924070794a58db8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Dec 2019 15:50:27 +0100 +Subject: KVM: x86: use CPUID to locate host page table reserved bits + +From: Paolo Bonzini + +[ Upstream commit 7adacf5eb2d2048045d9fd8fdab861fd9e7e2e96 ] + +The comment in kvm_get_shadow_phys_bits refers to MKTME, but the same is actually +true of SME and SEV. Just use CPUID[0x8000_0008].EAX[7:0] unconditionally if +available, it is simplest and works even if memory is not encrypted. + +Cc: stable@vger.kernel.org +Reported-by: Tom Lendacky +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index 3644ac215567b..d05c10651398f 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); + static u8 kvm_get_shadow_phys_bits(void) + { + /* +- * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected +- * in CPU detection code, but MKTME treats those reduced bits as +- * 'keyID' thus they are not reserved bits. Therefore for MKTME +- * we should still return physical address bits reported by CPUID. ++ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected ++ * in CPU detection code, but the processor treats those reduced bits as ++ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at ++ * the physical address bits reported by CPUID. + */ +- if (!boot_cpu_has(X86_FEATURE_TME) || +- WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) +- return boot_cpu_data.x86_phys_bits; ++ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) ++ return cpuid_eax(0x80000008) & 0xff; + +- return cpuid_eax(0x80000008) & 0xff; ++ /* ++ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with ++ * custom CPUID. Proceed with whatever the kernel found since these features ++ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). ++ */ ++ return boot_cpu_data.x86_phys_bits; + } + + static void kvm_mmu_reset_all_pte_masks(void) +-- +2.20.1 + diff --git a/queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch b/queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch new file mode 100644 index 00000000000..ccac174baba --- /dev/null +++ b/queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch @@ -0,0 +1,668 @@ +From 38f54b3ce187f6f145e3205911d4daa73e13649e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Dec 2019 15:57:14 -0800 +Subject: KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM + +From: Sean Christopherson + +[ Upstream commit 736c291c9f36b07f8889c61764c28edce20e715d ] + +Convert a plethora of parameters and variables in the MMU and page fault +flows from type gva_t to gpa_t to properly handle TDP on 32-bit KVM. + +Thanks to PSE and PAE paging, 32-bit kernels can access 64-bit physical +addresses. When TDP is enabled, the fault address is a guest physical +address and thus can be a 64-bit value, even when both KVM and its guest +are using 32-bit virtual addressing, e.g. VMX's VMCS.GUEST_PHYSICAL is a +64-bit field, not a natural width field. + +Using a gva_t for the fault address means KVM will incorrectly drop the +upper 32-bits of the GPA. Ditto for gva_to_gpa() when it is used to +translate L2 GPAs to L1 GPAs. + +Opportunistically rename variables and parameters to better reflect the +dual address modes, e.g. use "cr2_or_gpa" for fault addresses and plain +"addr" instead of "vaddr" when the address may be either a GVA or an L2 +GPA. Similarly, use "gpa" in the nonpaging_page_fault() flows to avoid +a confusing "gpa_t gva" declaration; this also sets the stage for a +future patch to combing nonpaging_page_fault() and tdp_page_fault() with +minimal churn. + +Sprinkle in a few comments to document flows where an address is known +to be a GVA and thus can be safely truncated to a 32-bit value. Add +WARNs in kvm_handle_page_fault() and FNAME(gva_to_gpa_nested)() to help +document such cases and detect bugs. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/include/asm/kvm_host.h | 8 ++-- + arch/x86/kvm/mmu.c | 69 +++++++++++++++++++-------------- + arch/x86/kvm/mmutrace.h | 12 +++--- + arch/x86/kvm/paging_tmpl.h | 25 +++++++----- + arch/x86/kvm/x86.c | 40 +++++++++---------- + arch/x86/kvm/x86.h | 2 +- + include/linux/kvm_host.h | 6 +-- + virt/kvm/async_pf.c | 10 ++--- + 8 files changed, 94 insertions(+), 78 deletions(-) + +diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h +index 10434cf402dee..c1ed054c103c3 100644 +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -380,12 +380,12 @@ struct kvm_mmu { + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); + unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); +- int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, ++ int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, + bool prefault); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); +- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, +- struct x86_exception *exception); ++ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, ++ u32 access, struct x86_exception *exception); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, + struct x86_exception *exception); + int (*sync_page)(struct kvm_vcpu *vcpu, +@@ -1451,7 +1451,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); + + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); + +-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, ++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, + void *insn, int insn_len); + void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); + void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); +diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c +index d05c10651398f..d7aa34bb318a5 100644 +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3532,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) + * - true: let the vcpu to access on the same address again. + * - false: let the real page fault path to fix it. + */ +-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, ++static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, + u32 error_code) + { + struct kvm_shadow_walk_iterator iterator; +@@ -3552,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + do { + u64 new_spte; + +- for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) ++ for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) + if (!is_shadow_present_pte(spte) || + iterator.level < level) + break; +@@ -3630,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + + } while (true); + +- trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, ++ trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, + spte, fault_handled); + walk_shadow_page_lockless_end(vcpu); + +@@ -3638,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, + } + + static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); ++ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, ++ bool *writable); + static int make_mmu_pages_available(struct kvm_vcpu *vcpu); + +-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, ++static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + gfn_t gfn, bool prefault) + { + int r; +@@ -3667,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); + } + +- if (fast_page_fault(vcpu, v, level, error_code)) ++ if (fast_page_fault(vcpu, gpa, level, error_code)) + return RET_PF_RETRY; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + smp_rmb(); + +- if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) ++ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) + return RET_PF_RETRY; + +- if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) ++ if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) + return r; + + r = RET_PF_RETRY; +@@ -3687,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, v, write, map_writable, level, pfn, ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, + prefault, false); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +@@ -3985,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) + } + EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); + +-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, ++static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, struct x86_exception *exception) + { + if (exception) +@@ -3993,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, + return vaddr; + } + +-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, ++static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, + struct x86_exception *exception) + { +@@ -4153,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) + walk_shadow_page_lockless_end(vcpu); + } + +-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, ++static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, + u32 error_code, bool prefault) + { +- gfn_t gfn = gva >> PAGE_SHIFT; ++ gfn_t gfn = gpa >> PAGE_SHIFT; + int r; + +- pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); ++ /* Note, paging is disabled, ergo gva == gpa. */ ++ pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); + + if (page_fault_handle_page_track(vcpu, error_code, gfn)) + return RET_PF_EMULATE; +@@ -4171,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); + + +- return nonpaging_map(vcpu, gva & PAGE_MASK, ++ return nonpaging_map(vcpu, gpa & PAGE_MASK, + error_code, gfn, prefault); + } + +-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) ++static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ gfn_t gfn) + { + struct kvm_arch_async_pf arch; + +@@ -4184,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) + arch.direct_map = vcpu->arch.mmu->direct_map; + arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); + +- return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); ++ return kvm_setup_async_pf(vcpu, cr2_or_gpa, ++ kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + } + + static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +- gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) ++ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, ++ bool *writable) + { + struct kvm_memory_slot *slot; + bool async; +@@ -4208,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, + return false; /* *pfn has correct page already */ + + if (!prefault && kvm_can_do_async_pf(vcpu)) { +- trace_kvm_try_async_get_page(gva, gfn); ++ trace_kvm_try_async_get_page(cr2_or_gpa, gfn); + if (kvm_find_async_pf_gfn(vcpu, gfn)) { +- trace_kvm_async_pf_doublefault(gva, gfn); ++ trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); + kvm_make_request(KVM_REQ_APF_HALT, vcpu); + return true; +- } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) ++ } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) + return true; + } + +@@ -4226,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + { + int r = 1; + ++#ifndef CONFIG_X86_64 ++ /* A 64-bit CR2 should be impossible on 32-bit KVM. */ ++ if (WARN_ON_ONCE(fault_address >> 32)) ++ return -EFAULT; ++#endif ++ + vcpu->arch.l1tf_flush_l1d = true; + switch (vcpu->arch.apf.host_apf_reason) { + default: +@@ -4263,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) + return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); + } + +-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, ++static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, + bool prefault) + { + kvm_pfn_t pfn; +@@ -5520,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) + return 0; + } + +-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, ++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, + void *insn, int insn_len) + { + int r, emulation_type = 0; +@@ -5529,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + /* With shadow page tables, fault_address contains a GVA or nGPA. */ + if (vcpu->arch.mmu->direct_map) { + vcpu->arch.gpa_available = true; +- vcpu->arch.gpa_val = cr2; ++ vcpu->arch.gpa_val = cr2_or_gpa; + } + + r = RET_PF_INVALID; + if (unlikely(error_code & PFERR_RSVD_MASK)) { +- r = handle_mmio_page_fault(vcpu, cr2, direct); ++ r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); + if (r == RET_PF_EMULATE) + goto emulate; + } + + if (r == RET_PF_INVALID) { +- r = vcpu->arch.mmu->page_fault(vcpu, cr2, ++ r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, + lower_32_bits(error_code), + false); + WARN_ON(r == RET_PF_INVALID); +@@ -5560,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + */ + if (vcpu->arch.mmu->direct_map && + (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { +- kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); ++ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); + return 1; + } + +@@ -5575,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. + */ +- if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) ++ if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) + emulation_type = EMULTYPE_ALLOW_RETRY; + emulate: + /* +@@ -5590,7 +5601,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, + return 1; + } + +- return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, ++ return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, + insn_len); + } + EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); +diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h +index 7ca8831c7d1a2..3c6522b84ff11 100644 +--- a/arch/x86/kvm/mmutrace.h ++++ b/arch/x86/kvm/mmutrace.h +@@ -249,13 +249,13 @@ TRACE_EVENT( + + TRACE_EVENT( + fast_page_fault, +- TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, ++ TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, + u64 *sptep, u64 old_spte, bool retry), +- TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), ++ TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), + + TP_STRUCT__entry( + __field(int, vcpu_id) +- __field(gva_t, gva) ++ __field(gpa_t, cr2_or_gpa) + __field(u32, error_code) + __field(u64 *, sptep) + __field(u64, old_spte) +@@ -265,7 +265,7 @@ TRACE_EVENT( + + TP_fast_assign( + __entry->vcpu_id = vcpu->vcpu_id; +- __entry->gva = gva; ++ __entry->cr2_or_gpa = cr2_or_gpa; + __entry->error_code = error_code; + __entry->sptep = sptep; + __entry->old_spte = old_spte; +@@ -273,9 +273,9 @@ TRACE_EVENT( + __entry->retry = retry; + ), + +- TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" ++ TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" + " new %llx spurious %d fixed %d", __entry->vcpu_id, +- __entry->gva, __print_flags(__entry->error_code, "|", ++ __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", + kvm_mmu_trace_pferr_flags), __entry->sptep, + __entry->old_spte, __entry->new_spte, + __spte_satisfied(old_spte), __spte_satisfied(new_spte) +diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h +index 97b21e7fd013d..c1d7b866a03fa 100644 +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) + } + + /* +- * Fetch a guest pte for a guest virtual address ++ * Fetch a guest pte for a guest virtual address, or for an L2's GPA. + */ + static int FNAME(walk_addr_generic)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +- gva_t addr, u32 access) ++ gpa_t addr, u32 access) + { + int ret; + pt_element_t pte; +@@ -496,7 +496,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, + } + + static int FNAME(walk_addr)(struct guest_walker *walker, +- struct kvm_vcpu *vcpu, gva_t addr, u32 access) ++ struct kvm_vcpu *vcpu, gpa_t addr, u32 access) + { + return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, + access); +@@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, + * If the guest tries to write a write-protected page, we need to + * emulate this operation, return 1 to indicate this case. + */ +-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ++static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, + struct guest_walker *gw, + int write_fault, int hlevel, + kvm_pfn_t pfn, bool map_writable, bool prefault, +@@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, + * Returns: 1 if we need to emulate the instruction, 0 otherwise, or + * a negative value on error. + */ +-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ++static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, + bool prefault) + { + int write_fault = error_code & PFERR_WRITE_MASK; +@@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) + spin_unlock(&vcpu->kvm->mmu_lock); + } + +-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, ++/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ ++static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, + struct x86_exception *exception) + { + struct guest_walker walker; + gpa_t gpa = UNMAPPED_GVA; + int r; + +- r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); ++ r = FNAME(walk_addr)(&walker, vcpu, addr, access); + + if (r) { + gpa = gfn_to_gpa(walker.gfn); +- gpa |= vaddr & ~PAGE_MASK; ++ gpa |= addr & ~PAGE_MASK; + } else if (exception) + *exception = walker.fault; + +@@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, + } + + #if PTTYPE != PTTYPE_EPT +-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, ++/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ ++static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, + u32 access, + struct x86_exception *exception) + { +@@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, + gpa_t gpa = UNMAPPED_GVA; + int r; + ++#ifndef CONFIG_X86_64 ++ /* A 64-bit GVA should be impossible on 32-bit KVM. */ ++ WARN_ON_ONCE(vaddr >> 32); ++#endif ++ + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); + + if (r) { +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 92e025d1b5175..edde5ee8c6f50 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -6409,11 +6409,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) + return 1; + } + +-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, ++static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + bool write_fault_to_shadow_pgtable, + int emulation_type) + { +- gpa_t gpa = cr2; ++ gpa_t gpa = cr2_or_gpa; + kvm_pfn_t pfn; + + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) +@@ -6427,7 +6427,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + * Write permission should be allowed since only + * write access need to be emulated. + */ +- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + + /* + * If the mapping is invalid in guest, let cpu retry +@@ -6484,10 +6484,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + } + + static bool retry_instruction(struct x86_emulate_ctxt *ctxt, +- unsigned long cr2, int emulation_type) ++ gpa_t cr2_or_gpa, int emulation_type) + { + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); +- unsigned long last_retry_eip, last_retry_addr, gpa = cr2; ++ unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; + + last_retry_eip = vcpu->arch.last_retry_eip; + last_retry_addr = vcpu->arch.last_retry_addr; +@@ -6516,14 +6516,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, + if (x86_page_table_writing_insn(ctxt)) + return false; + +- if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) ++ if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) + return false; + + vcpu->arch.last_retry_eip = ctxt->eip; +- vcpu->arch.last_retry_addr = cr2; ++ vcpu->arch.last_retry_addr = cr2_or_gpa; + + if (!vcpu->arch.mmu->direct_map) +- gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); ++ gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); + + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + +@@ -6669,11 +6669,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) + return false; + } + +-int x86_emulate_instruction(struct kvm_vcpu *vcpu, +- unsigned long cr2, +- int emulation_type, +- void *insn, +- int insn_len) ++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ int emulation_type, void *insn, int insn_len) + { + int r; + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; +@@ -6719,8 +6716,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } +- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, +- emulation_type)) ++ if (reexecute_instruction(vcpu, cr2_or_gpa, ++ write_fault_to_spt, ++ emulation_type)) + return 1; + if (ctxt->have_exception) { + /* +@@ -6754,7 +6752,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + return 1; + } + +- if (retry_instruction(ctxt, cr2, emulation_type)) ++ if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) + return 1; + + /* this is needed for vmware backdoor interface to work since it +@@ -6766,7 +6764,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + + restart: + /* Save the faulting GPA (cr2) in the address field */ +- ctxt->exception.address = cr2; ++ ctxt->exception.address = cr2_or_gpa; + + r = x86_emulate_insn(ctxt); + +@@ -6774,7 +6772,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, + return 1; + + if (r == EMULATION_FAILED) { +- if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, ++ if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, + emulation_type)) + return 1; + +@@ -10051,7 +10049,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) + return; + +- vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); ++ vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); + } + + static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) +@@ -10164,7 +10162,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + { + struct x86_exception fault; + +- trace_kvm_async_pf_not_present(work->arch.token, work->gva); ++ trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); + kvm_add_async_pf_gfn(vcpu, work->arch.gfn); + + if (kvm_can_deliver_async_pf(vcpu) && +@@ -10199,7 +10197,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + work->arch.token = ~0; /* broadcast wakeup */ + else + kvm_del_async_pf_gfn(vcpu, work->arch.gfn); +- trace_kvm_async_pf_ready(work->arch.token, work->gva); ++ trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); + + if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && + !apf_get_user(vcpu, &val)) { +diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h +index dbf7442a822b6..de6b55484876a 100644 +--- a/arch/x86/kvm/x86.h ++++ b/arch/x86/kvm/x86.h +@@ -286,7 +286,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); + bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, + int page_num); + bool kvm_vector_hashing_enabled(void); +-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, ++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len); + + #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index abfc2fbde957c..7d1564c408972 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -204,7 +204,7 @@ struct kvm_async_pf { + struct list_head queue; + struct kvm_vcpu *vcpu; + struct mm_struct *mm; +- gva_t gva; ++ gpa_t cr2_or_gpa; + unsigned long addr; + struct kvm_arch_async_pf arch; + bool wakeup_all; +@@ -212,8 +212,8 @@ struct kvm_async_pf { + + void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); + void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); +-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, +- struct kvm_arch_async_pf *arch); ++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ unsigned long hva, struct kvm_arch_async_pf *arch); + int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); + #endif + +diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c +index 35305d6e68cc6..d8ef708a2ef67 100644 +--- a/virt/kvm/async_pf.c ++++ b/virt/kvm/async_pf.c +@@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work) + struct mm_struct *mm = apf->mm; + struct kvm_vcpu *vcpu = apf->vcpu; + unsigned long addr = apf->addr; +- gva_t gva = apf->gva; ++ gpa_t cr2_or_gpa = apf->cr2_or_gpa; + int locked = 1; + + might_sleep(); +@@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work) + * this point + */ + +- trace_kvm_async_pf_completed(addr, gva); ++ trace_kvm_async_pf_completed(addr, cr2_or_gpa); + + if (swq_has_sleeper(&vcpu->wq)) + swake_up_one(&vcpu->wq); +@@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) + } + } + +-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, +- struct kvm_arch_async_pf *arch) ++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, ++ unsigned long hva, struct kvm_arch_async_pf *arch) + { + struct kvm_async_pf *work; + +@@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, + + work->wakeup_all = false; + work->vcpu = vcpu; +- work->gva = gva; ++ work->cr2_or_gpa = cr2_or_gpa; + work->addr = hva; + work->arch = *arch; + work->mm = current->mm; +-- +2.20.1 + diff --git a/queue-5.4/series b/queue-5.4/series index eed9b9f928f..cac44e1c9e1 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -289,3 +289,13 @@ mfd-da9062-fix-watchdog-compatible-string.patch mfd-rn5t618-mark-adc-control-register-volatile.patch mfd-bd70528-fix-hour-register-mask.patch x86-timer-don-t-skip-pit-setup-when-apic-is-disabled-or-in-legacy-mode.patch +btrfs-use-bool-argument-in-free_root_pointers.patch +btrfs-free-block-groups-after-free-ing-fs-trees.patch +drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch +kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch +kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch +kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch +kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch +kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch +kvm-use-vcpu-specific-gva-hva-translation-when-query.patch +kvm-play-nice-with-read-only-memslots-when-querying-.patch