]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Sun, 9 Feb 2020 22:18:07 +0000 (17:18 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 9 Feb 2020 22:18:07 +0000 (17:18 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch [new file with mode: 0644]
queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch [new file with mode: 0644]
queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch [new file with mode: 0644]
queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch [new file with mode: 0644]
queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch [new file with mode: 0644]
queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch [new file with mode: 0644]
queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch [new file with mode: 0644]
queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch [new file with mode: 0644]
queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch [new file with mode: 0644]
queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch b/queue-5.4/btrfs-free-block-groups-after-free-ing-fs-trees.patch
new file mode 100644 (file)
index 0000000..fef7280
--- /dev/null
@@ -0,0 +1,57 @@
+From 61485f017f9e2f99a64cbfac202f5a5e11f21874 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jan 2020 09:17:06 -0500
+Subject: btrfs: free block groups after free'ing fs trees
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit 4e19443da1941050b346f8fc4c368aa68413bc88 ]
+
+Sometimes when running generic/475 we would trip the
+WARN_ON(cache->reserved) check when free'ing the block groups on umount.
+This is because sometimes we don't commit the transaction because of IO
+errors and thus do not cleanup the tree logs until at umount time.
+
+These blocks are still reserved until they are cleaned up, but they
+aren't cleaned up until _after_ we do the free block groups work.  Fix
+this by moving the free after free'ing the fs roots, that way all of the
+tree logs are cleaned up and we have a properly cleaned fs.  A bunch of
+loops of generic/475 confirmed this fixes the problem.
+
+CC: stable@vger.kernel.org # 4.9+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 835abaabd67d6..7becc5e96f923 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -4046,11 +4046,18 @@ void close_ctree(struct btrfs_fs_info *fs_info)
+       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+       btrfs_stop_all_workers(fs_info);
+-      btrfs_free_block_groups(fs_info);
+-
+       clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
+       free_root_pointers(fs_info, true);
++      /*
++       * We must free the block groups after dropping the fs_roots as we could
++       * have had an IO error and have left over tree log blocks that aren't
++       * cleaned up until the fs roots are freed.  This makes the block group
++       * accounting appear to be wrong because there's pending reserved bytes,
++       * so make sure we do the block group cleanup afterwards.
++       */
++      btrfs_free_block_groups(fs_info);
++
+       iput(fs_info->btree_inode);
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+-- 
+2.20.1
+
diff --git a/queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch b/queue-5.4/btrfs-use-bool-argument-in-free_root_pointers.patch
new file mode 100644 (file)
index 0000000..6983109
--- /dev/null
@@ -0,0 +1,73 @@
+From ce8be75289952f2432de954f6baffbf624b8a83c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Oct 2019 10:39:25 +0800
+Subject: btrfs: use bool argument in free_root_pointers()
+
+From: Anand Jain <anand.jain@oracle.com>
+
+[ Upstream commit 4273eaff9b8d5e141113a5bdf9628c02acf3afe5 ]
+
+We don't need int argument bool shall do in free_root_pointers().  And
+rename the argument as it confused two people.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Anand Jain <anand.jain@oracle.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 68266928a4aa7..835abaabd67d6 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -2016,7 +2016,7 @@ static void free_root_extent_buffers(struct btrfs_root *root)
+ }
+ /* helper to cleanup tree roots */
+-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
++static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
+ {
+       free_root_extent_buffers(info->tree_root);
+@@ -2025,7 +2025,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
+       free_root_extent_buffers(info->csum_root);
+       free_root_extent_buffers(info->quota_root);
+       free_root_extent_buffers(info->uuid_root);
+-      if (chunk_root)
++      if (free_chunk_root)
+               free_root_extent_buffers(info->chunk_root);
+       free_root_extent_buffers(info->free_space_root);
+ }
+@@ -3323,7 +3323,7 @@ int open_ctree(struct super_block *sb,
+       btrfs_put_block_group_cache(fs_info);
+ fail_tree_roots:
+-      free_root_pointers(fs_info, 1);
++      free_root_pointers(fs_info, true);
+       invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+ fail_sb_buffer:
+@@ -3355,7 +3355,7 @@ int open_ctree(struct super_block *sb,
+       if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
+               goto fail_tree_roots;
+-      free_root_pointers(fs_info, 0);
++      free_root_pointers(fs_info, false);
+       /* don't use the log in recovery mode, it won't be valid */
+       btrfs_set_super_log_root(disk_super, 0);
+@@ -4049,7 +4049,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
+       btrfs_free_block_groups(fs_info);
+       clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
+-      free_root_pointers(fs_info, 1);
++      free_root_pointers(fs_info, true);
+       iput(fs_info->btree_inode);
+-- 
+2.20.1
+
diff --git a/queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch b/queue-5.4/drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch
new file mode 100644 (file)
index 0000000..a5b81a6
--- /dev/null
@@ -0,0 +1,92 @@
+From 149296b2439f9e86545fac26358882efd86da7c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2019 17:00:43 +0800
+Subject: drm/dp_mst: Remove VCPI while disabling topology mgr
+
+From: Wayne Lin <Wayne.Lin@amd.com>
+
+[ Upstream commit 64e62bdf04ab8529f45ed0a85122c703035dec3a ]
+
+[Why]
+
+This patch is trying to address the issue observed when hotplug DP
+daisy chain monitors.
+
+e.g.
+src-mstb-mstb-sst -> src (unplug) mstb-mstb-sst -> src-mstb-mstb-sst
+(plug in again)
+
+Once unplug a DP MST capable device, driver will call
+drm_dp_mst_topology_mgr_set_mst() to disable MST. In this function,
+it cleans data of topology manager while disabling mst_state. However,
+it doesn't clean up the proposed_vcpis of topology manager.
+If proposed_vcpi is not reset, once plug in MST daisy chain monitors
+later, code will fail at checking port validation while trying to
+allocate payloads.
+
+When MST capable device is plugged in again and try to allocate
+payloads by calling drm_dp_update_payload_part1(), this
+function will iterate over all proposed virtual channels to see if
+any proposed VCPI's num_slots is greater than 0. If any proposed
+VCPI's num_slots is greater than 0 and the port which the
+specific virtual channel directed to is not in the topology, code then
+fails at the port validation. Since there are stale VCPI allocations
+from the previous topology enablement in proposed_vcpi[], code will fail
+at port validation and reurn EINVAL.
+
+[How]
+
+Clean up the data of stale proposed_vcpi[] and reset mgr->proposed_vcpis
+to NULL while disabling mst in drm_dp_mst_topology_mgr_set_mst().
+
+Changes since v1:
+*Add on more details in commit message to describe the issue which the
+patch is trying to fix
+
+Signed-off-by: Wayne Lin <Wayne.Lin@amd.com>
+[added cc to stable]
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20191205090043.7580-1-Wayne.Lin@amd.com
+Cc: <stable@vger.kernel.org> # v3.17+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
+index a48a4c21b1b38..c5e9e2305fffc 100644
+--- a/drivers/gpu/drm/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/drm_dp_mst_topology.c
+@@ -2694,6 +2694,7 @@ static bool drm_dp_get_vc_payload_bw(int dp_link_bw,
+ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state)
+ {
+       int ret = 0;
++      int i = 0;
+       struct drm_dp_mst_branch *mstb = NULL;
+       mutex_lock(&mgr->lock);
+@@ -2754,10 +2755,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms
+               /* this can fail if the device is gone */
+               drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0);
+               ret = 0;
++              mutex_lock(&mgr->payload_lock);
+               memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload));
+               mgr->payload_mask = 0;
+               set_bit(0, &mgr->payload_mask);
++              for (i = 0; i < mgr->max_payloads; i++) {
++                      struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i];
++
++                      if (vcpi) {
++                              vcpi->vcpi = 0;
++                              vcpi->num_slots = 0;
++                      }
++                      mgr->proposed_vcpis[i] = NULL;
++              }
+               mgr->vcpi_mask = 0;
++              mutex_unlock(&mgr->payload_lock);
+       }
+ out_unlock:
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch b/queue-5.4/kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch
new file mode 100644 (file)
index 0000000..4961ea1
--- /dev/null
@@ -0,0 +1,43 @@
+From 94ed217445d05f7ba9be36dd0615c7b72bd00d45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 Dec 2019 14:25:24 +0800
+Subject: KVM: nVMX: vmread should not set rflags to specify success in case of
+ #PF
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit a4d956b9390418623ae5d07933e2679c68b6f83c ]
+
+In case writing to vmread destination operand result in a #PF, vmread
+should not call nested_vmx_succeed() to set rflags to specify success.
+Similar to as done in VMPTRST (See handle_vmptrst()).
+
+Reviewed-by: Liran Alon <liran.alon@oracle.com>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index d0523741fb037..931d3b5f3acd4 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4663,8 +4663,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
+                               vmx_instruction_info, true, len, &gva))
+                       return 1;
+               /* _system ok, nested_vmx_check_permission has verified cpl=0 */
+-              if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
++              if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) {
+                       kvm_inject_page_fault(vcpu, &e);
++                      return 1;
++              }
+       }
+       return nested_vmx_succeed(vcpu);
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch b/queue-5.4/kvm-play-nice-with-read-only-memslots-when-querying-.patch
new file mode 100644 (file)
index 0000000..ec62d75
--- /dev/null
@@ -0,0 +1,46 @@
+From 3dd1eb528e0e4e66be6ca17bffd484fd670b49ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jan 2020 12:24:38 -0800
+Subject: KVM: Play nice with read-only memslots when querying host page size
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+[ Upstream commit 42cde48b2d39772dba47e680781a32a6c4b7dc33 ]
+
+Avoid the "writable" check in __gfn_to_hva_many(), which will always fail
+on read-only memslots due to gfn_to_hva() assuming writes.  Functionally,
+this allows x86 to create large mappings for read-only memslots that
+are backed by HugeTLB mappings.
+
+Note, the changelog for commit 05da45583de9 ("KVM: MMU: large page
+support") states "If the largepage contains write-protected pages, a
+large pte is not used.", but "write-protected" refers to pages that are
+temporarily read-only, e.g. read-only memslots didn't even exist at the
+time.
+
+Fixes: 4d8b81abc47b ("KVM: introduce readonly memslot")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+[Redone using kvm_vcpu_gfn_to_memslot_prot. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/kvm_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 877ce955b99cf..b5ea1bafe513c 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1401,7 +1401,7 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
+       size = PAGE_SIZE;
+-      addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
++      addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL);
+       if (kvm_is_error_hva(addr))
+               return PAGE_SIZE;
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch b/queue-5.4/kvm-use-vcpu-specific-gva-hva-translation-when-query.patch
new file mode 100644 (file)
index 0000000..2094cb9
--- /dev/null
@@ -0,0 +1,103 @@
+From 333f404a8192aabb67094c9ab213609235e2d135 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Jan 2020 12:24:37 -0800
+Subject: KVM: Use vcpu-specific gva->hva translation when querying host page
+ size
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+[ Upstream commit f9b84e19221efc5f493156ee0329df3142085f28 ]
+
+Use kvm_vcpu_gfn_to_hva() when retrieving the host page size so that the
+correct set of memslots is used when handling x86 page faults in SMM.
+
+Fixes: 54bf36aac520 ("KVM: x86: use vcpu-specific functions to read/write/translate GFNs")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kvm/book3s_xive_native.c | 2 +-
+ arch/x86/kvm/mmu.c                    | 6 +++---
+ include/linux/kvm_host.h              | 2 +-
+ virt/kvm/kvm_main.c                   | 4 ++--
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
+index 5a3373e06e60b..235d57d6c205e 100644
+--- a/arch/powerpc/kvm/book3s_xive_native.c
++++ b/arch/powerpc/kvm/book3s_xive_native.c
+@@ -638,7 +638,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+       gfn = gpa_to_gfn(kvm_eq.qaddr);
+-      page_size = kvm_host_page_size(kvm, gfn);
++      page_size = kvm_host_page_size(vcpu, gfn);
+       if (1ull << kvm_eq.qshift > page_size) {
+               srcu_read_unlock(&kvm->srcu, srcu_idx);
+               pr_warn("Incompatible host page size %lx!\n", page_size);
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 46070da9e08f8..518100ea5ef47 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1286,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn,
+       return __mmu_gfn_lpage_is_disallowed(gfn, level, slot);
+ }
+-static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
++static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn)
+ {
+       unsigned long page_size;
+       int i, ret = 0;
+-      page_size = kvm_host_page_size(kvm, gfn);
++      page_size = kvm_host_page_size(vcpu, gfn);
+       for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
+               if (page_size >= KVM_HPAGE_SIZE(i))
+@@ -1341,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
+       if (unlikely(*force_pt_level))
+               return PT_PAGE_TABLE_LEVEL;
+-      host_level = host_mapping_level(vcpu->kvm, large_gfn);
++      host_level = host_mapping_level(vcpu, large_gfn);
+       if (host_level == PT_PAGE_TABLE_LEVEL)
+               return host_level;
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 7d1564c408972..b81f0f1ded5f9 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -751,7 +751,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
+ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
+ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
+ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
+ struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 6f0287ad927ee..877ce955b99cf 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1394,14 +1394,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+ }
+ EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
+-unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
++unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
+ {
+       struct vm_area_struct *vma;
+       unsigned long addr, size;
+       size = PAGE_SIZE;
+-      addr = gfn_to_hva(kvm, gfn);
++      addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+       if (kvm_is_error_hva(addr))
+               return PAGE_SIZE;
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch b/queue-5.4/kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch
new file mode 100644 (file)
index 0000000..bd9f083
--- /dev/null
@@ -0,0 +1,72 @@
+From f9b490c807854b02b1af757256fe41c95cb0859d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Jan 2020 20:09:03 +0100
+Subject: KVM: x86: fix overlap between SPTE_MMIO_MASK and generation
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 56871d444bc4d7ea66708775e62e2e0926384dbc ]
+
+The SPTE_MMIO_MASK overlaps with the bits used to track MMIO
+generation number.  A high enough generation number would overwrite the
+SPTE_SPECIAL_MASK region and cause the MMIO SPTE to be misinterpreted.
+
+Likewise, setting bits 52 and 53 would also cause an incorrect generation
+number to be read from the PTE, though this was partially mitigated by the
+(useless if it weren't for the bug) removal of SPTE_SPECIAL_MASK from
+the spte in get_mmio_spte_generation.  Drop that removal, and replace
+it with a compile-time assertion.
+
+Fixes: 6eeb4ef049e7 ("KVM: x86: assign two bits to track SPTE kinds")
+Reported-by: Ben Gardon <bgardon@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index d7aa34bb318a5..46070da9e08f8 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte)
+  * requires a full MMU zap).  The flag is instead explicitly queried when
+  * checking for MMIO spte cache hits.
+  */
+-#define MMIO_SPTE_GEN_MASK            GENMASK_ULL(18, 0)
++#define MMIO_SPTE_GEN_MASK            GENMASK_ULL(17, 0)
+ #define MMIO_SPTE_GEN_LOW_START               3
+ #define MMIO_SPTE_GEN_LOW_END         11
+ #define MMIO_SPTE_GEN_LOW_MASK                GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+                                                   MMIO_SPTE_GEN_LOW_START)
+-#define MMIO_SPTE_GEN_HIGH_START      52
+-#define MMIO_SPTE_GEN_HIGH_END                61
++#define MMIO_SPTE_GEN_HIGH_START      PT64_SECOND_AVAIL_BITS_SHIFT
++#define MMIO_SPTE_GEN_HIGH_END                62
+ #define MMIO_SPTE_GEN_HIGH_MASK               GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
+                                                   MMIO_SPTE_GEN_HIGH_START)
++
+ static u64 generation_mmio_spte_mask(u64 gen)
+ {
+       u64 mask;
+       WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
++      BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
+       mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
+       mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+@@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte)
+ {
+       u64 gen;
+-      spte &= ~shadow_mmio_mask;
+-
+       gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
+       gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+       return gen;
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch b/queue-5.4/kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch
new file mode 100644 (file)
index 0000000..9c7e931
--- /dev/null
@@ -0,0 +1,46 @@
+From d2ec8bee9b100de0eef44cfa4073f0e623432fc0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jan 2020 16:12:10 -0800
+Subject: KVM: x86/mmu: Apply max PA check for MMIO sptes to 32-bit KVM
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+[ Upstream commit e30a7d623dccdb3f880fbcad980b0cb589a1da45 ]
+
+Remove the bogus 64-bit only condition from the check that disables MMIO
+spte optimization when the system supports the max PA, i.e. doesn't have
+any reserved PA bits.  32-bit KVM always uses PAE paging for the shadow
+MMU, and per Intel's SDM:
+
+  PAE paging translates 32-bit linear addresses to 52-bit physical
+  addresses.
+
+The kernel's restrictions on max physical addresses are limits on how
+much memory the kernel can reasonably use, not what physical addresses
+are supported by hardware.
+
+Fixes: ce88decffd17 ("KVM: MMU: mmio page fault support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 2ce9da58611ed..3644ac215567b 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -6249,7 +6249,7 @@ static void kvm_set_mmio_spte_mask(void)
+        * If reserved bit is not supported, clear the present bit to disable
+        * mmio page fault.
+        */
+-      if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
++      if (shadow_phys_bits == 52)
+               mask &= ~1ull;
+       kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch b/queue-5.4/kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch
new file mode 100644 (file)
index 0000000..f0a0fb1
--- /dev/null
@@ -0,0 +1,57 @@
+From d2978d773ea6b7bc495fe4ce1924070794a58db8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Dec 2019 15:50:27 +0100
+Subject: KVM: x86: use CPUID to locate host page table reserved bits
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 7adacf5eb2d2048045d9fd8fdab861fd9e7e2e96 ]
+
+The comment in kvm_get_shadow_phys_bits refers to MKTME, but the same is actually
+true of SME and SEV.  Just use CPUID[0x8000_0008].EAX[7:0] unconditionally if
+available, it is simplest and works even if memory is not encrypted.
+
+Cc: stable@vger.kernel.org
+Reported-by: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.c | 20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 3644ac215567b..d05c10651398f 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
+ static u8 kvm_get_shadow_phys_bits(void)
+ {
+       /*
+-       * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
+-       * in CPU detection code, but MKTME treats those reduced bits as
+-       * 'keyID' thus they are not reserved bits. Therefore for MKTME
+-       * we should still return physical address bits reported by CPUID.
++       * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
++       * in CPU detection code, but the processor treats those reduced bits as
++       * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
++       * the physical address bits reported by CPUID.
+        */
+-      if (!boot_cpu_has(X86_FEATURE_TME) ||
+-          WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
+-              return boot_cpu_data.x86_phys_bits;
++      if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
++              return cpuid_eax(0x80000008) & 0xff;
+-      return cpuid_eax(0x80000008) & 0xff;
++      /*
++       * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
++       * custom CPUID.  Proceed with whatever the kernel found since these features
++       * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
++       */
++      return boot_cpu_data.x86_phys_bits;
+ }
+ static void kvm_mmu_reset_all_pte_masks(void)
+-- 
+2.20.1
+
diff --git a/queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch b/queue-5.4/kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch
new file mode 100644 (file)
index 0000000..ccac174
--- /dev/null
@@ -0,0 +1,668 @@
+From 38f54b3ce187f6f145e3205911d4daa73e13649e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Dec 2019 15:57:14 -0800
+Subject: KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM
+
+From: Sean Christopherson <sean.j.christopherson@intel.com>
+
+[ Upstream commit 736c291c9f36b07f8889c61764c28edce20e715d ]
+
+Convert a plethora of parameters and variables in the MMU and page fault
+flows from type gva_t to gpa_t to properly handle TDP on 32-bit KVM.
+
+Thanks to PSE and PAE paging, 32-bit kernels can access 64-bit physical
+addresses.  When TDP is enabled, the fault address is a guest physical
+address and thus can be a 64-bit value, even when both KVM and its guest
+are using 32-bit virtual addressing, e.g. VMX's VMCS.GUEST_PHYSICAL is a
+64-bit field, not a natural width field.
+
+Using a gva_t for the fault address means KVM will incorrectly drop the
+upper 32-bits of the GPA.  Ditto for gva_to_gpa() when it is used to
+translate L2 GPAs to L1 GPAs.
+
+Opportunistically rename variables and parameters to better reflect the
+dual address modes, e.g. use "cr2_or_gpa" for fault addresses and plain
+"addr" instead of "vaddr" when the address may be either a GVA or an L2
+GPA.  Similarly, use "gpa" in the nonpaging_page_fault() flows to avoid
+a confusing "gpa_t gva" declaration; this also sets the stage for a
+future patch to combing nonpaging_page_fault() and tdp_page_fault() with
+minimal churn.
+
+Sprinkle in a few comments to document flows where an address is known
+to be a GVA and thus can be safely truncated to a 32-bit value.  Add
+WARNs in kvm_handle_page_fault() and FNAME(gva_to_gpa_nested)() to help
+document such cases and detect bugs.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h |  8 ++--
+ arch/x86/kvm/mmu.c              | 69 +++++++++++++++++++--------------
+ arch/x86/kvm/mmutrace.h         | 12 +++---
+ arch/x86/kvm/paging_tmpl.h      | 25 +++++++-----
+ arch/x86/kvm/x86.c              | 40 +++++++++----------
+ arch/x86/kvm/x86.h              |  2 +-
+ include/linux/kvm_host.h        |  6 +--
+ virt/kvm/async_pf.c             | 10 ++---
+ 8 files changed, 94 insertions(+), 78 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 10434cf402dee..c1ed054c103c3 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -380,12 +380,12 @@ struct kvm_mmu {
+       void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
+       unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+       u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
+-      int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
++      int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err,
+                         bool prefault);
+       void (*inject_page_fault)(struct kvm_vcpu *vcpu,
+                                 struct x86_exception *fault);
+-      gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+-                          struct x86_exception *exception);
++      gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
++                          u32 access, struct x86_exception *exception);
+       gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                              struct x86_exception *exception);
+       int (*sync_page)(struct kvm_vcpu *vcpu,
+@@ -1451,7 +1451,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
+-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
+                      void *insn, int insn_len);
+ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index d05c10651398f..d7aa34bb318a5 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3532,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte)
+  * - true: let the vcpu to access on the same address again.
+  * - false: let the real page fault path to fix it.
+  */
+-static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
++static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level,
+                           u32 error_code)
+ {
+       struct kvm_shadow_walk_iterator iterator;
+@@ -3552,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+       do {
+               u64 new_spte;
+-              for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
++              for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte)
+                       if (!is_shadow_present_pte(spte) ||
+                           iterator.level < level)
+                               break;
+@@ -3630,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+       } while (true);
+-      trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
++      trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep,
+                             spte, fault_handled);
+       walk_shadow_page_lockless_end(vcpu);
+@@ -3638,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
+ }
+ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+-                       gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable);
++                       gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
++                       bool *writable);
+ static int make_mmu_pages_available(struct kvm_vcpu *vcpu);
+-static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
++static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+                        gfn_t gfn, bool prefault)
+ {
+       int r;
+@@ -3667,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
+               gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+       }
+-      if (fast_page_fault(vcpu, v, level, error_code))
++      if (fast_page_fault(vcpu, gpa, level, error_code))
+               return RET_PF_RETRY;
+       mmu_seq = vcpu->kvm->mmu_notifier_seq;
+       smp_rmb();
+-      if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
++      if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+               return RET_PF_RETRY;
+-      if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
++      if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r))
+               return r;
+       r = RET_PF_RETRY;
+@@ -3687,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
+               goto out_unlock;
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+-      r = __direct_map(vcpu, v, write, map_writable, level, pfn,
++      r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+                        prefault, false);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+@@ -3985,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
+-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
++static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
+                                 u32 access, struct x86_exception *exception)
+ {
+       if (exception)
+@@ -3993,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+       return vaddr;
+ }
+-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
++static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
+                                        u32 access,
+                                        struct x86_exception *exception)
+ {
+@@ -4153,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
+       walk_shadow_page_lockless_end(vcpu);
+ }
+-static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
++static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa,
+                               u32 error_code, bool prefault)
+ {
+-      gfn_t gfn = gva >> PAGE_SHIFT;
++      gfn_t gfn = gpa >> PAGE_SHIFT;
+       int r;
+-      pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
++      /* Note, paging is disabled, ergo gva == gpa. */
++      pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code);
+       if (page_fault_handle_page_track(vcpu, error_code, gfn))
+               return RET_PF_EMULATE;
+@@ -4171,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
+       MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
+-      return nonpaging_map(vcpu, gva & PAGE_MASK,
++      return nonpaging_map(vcpu, gpa & PAGE_MASK,
+                            error_code, gfn, prefault);
+ }
+-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
++static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
++                                 gfn_t gfn)
+ {
+       struct kvm_arch_async_pf arch;
+@@ -4184,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
+       arch.direct_map = vcpu->arch.mmu->direct_map;
+       arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
+-      return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
++      return kvm_setup_async_pf(vcpu, cr2_or_gpa,
++                                kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+ }
+ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+-                       gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
++                       gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
++                       bool *writable)
+ {
+       struct kvm_memory_slot *slot;
+       bool async;
+@@ -4208,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
+               return false; /* *pfn has correct page already */
+       if (!prefault && kvm_can_do_async_pf(vcpu)) {
+-              trace_kvm_try_async_get_page(gva, gfn);
++              trace_kvm_try_async_get_page(cr2_or_gpa, gfn);
+               if (kvm_find_async_pf_gfn(vcpu, gfn)) {
+-                      trace_kvm_async_pf_doublefault(gva, gfn);
++                      trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn);
+                       kvm_make_request(KVM_REQ_APF_HALT, vcpu);
+                       return true;
+-              } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
++              } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn))
+                       return true;
+       }
+@@ -4226,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+ {
+       int r = 1;
++#ifndef CONFIG_X86_64
++      /* A 64-bit CR2 should be impossible on 32-bit KVM. */
++      if (WARN_ON_ONCE(fault_address >> 32))
++              return -EFAULT;
++#endif
++
+       vcpu->arch.l1tf_flush_l1d = true;
+       switch (vcpu->arch.apf.host_apf_reason) {
+       default:
+@@ -4263,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+       return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
+ }
+-static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
++static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
+                         bool prefault)
+ {
+       kvm_pfn_t pfn;
+@@ -5520,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
+       return 0;
+ }
+-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
++int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
+                      void *insn, int insn_len)
+ {
+       int r, emulation_type = 0;
+@@ -5529,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+       /* With shadow page tables, fault_address contains a GVA or nGPA.  */
+       if (vcpu->arch.mmu->direct_map) {
+               vcpu->arch.gpa_available = true;
+-              vcpu->arch.gpa_val = cr2;
++              vcpu->arch.gpa_val = cr2_or_gpa;
+       }
+       r = RET_PF_INVALID;
+       if (unlikely(error_code & PFERR_RSVD_MASK)) {
+-              r = handle_mmio_page_fault(vcpu, cr2, direct);
++              r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct);
+               if (r == RET_PF_EMULATE)
+                       goto emulate;
+       }
+       if (r == RET_PF_INVALID) {
+-              r = vcpu->arch.mmu->page_fault(vcpu, cr2,
++              r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa,
+                                              lower_32_bits(error_code),
+                                              false);
+               WARN_ON(r == RET_PF_INVALID);
+@@ -5560,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+        */
+       if (vcpu->arch.mmu->direct_map &&
+           (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
+-              kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
++              kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
+               return 1;
+       }
+@@ -5575,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+        * explicitly shadowing L1's page tables, i.e. unprotecting something
+        * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+        */
+-      if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
++      if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
+               emulation_type = EMULTYPE_ALLOW_RETRY;
+ emulate:
+       /*
+@@ -5590,7 +5601,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+                       return 1;
+       }
+-      return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
++      return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
+                                      insn_len);
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
+index 7ca8831c7d1a2..3c6522b84ff11 100644
+--- a/arch/x86/kvm/mmutrace.h
++++ b/arch/x86/kvm/mmutrace.h
+@@ -249,13 +249,13 @@ TRACE_EVENT(
+ TRACE_EVENT(
+       fast_page_fault,
+-      TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
++      TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code,
+                u64 *sptep, u64 old_spte, bool retry),
+-      TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry),
++      TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry),
+       TP_STRUCT__entry(
+               __field(int, vcpu_id)
+-              __field(gva_t, gva)
++              __field(gpa_t, cr2_or_gpa)
+               __field(u32, error_code)
+               __field(u64 *, sptep)
+               __field(u64, old_spte)
+@@ -265,7 +265,7 @@ TRACE_EVENT(
+       TP_fast_assign(
+               __entry->vcpu_id = vcpu->vcpu_id;
+-              __entry->gva = gva;
++              __entry->cr2_or_gpa = cr2_or_gpa;
+               __entry->error_code = error_code;
+               __entry->sptep = sptep;
+               __entry->old_spte = old_spte;
+@@ -273,9 +273,9 @@ TRACE_EVENT(
+               __entry->retry = retry;
+       ),
+-      TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx"
++      TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx"
+                 " new %llx spurious %d fixed %d", __entry->vcpu_id,
+-                __entry->gva, __print_flags(__entry->error_code, "|",
++                __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|",
+                 kvm_mmu_trace_pferr_flags), __entry->sptep,
+                 __entry->old_spte, __entry->new_spte,
+                 __spte_satisfied(old_spte), __spte_satisfied(new_spte)
+diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
+index 97b21e7fd013d..c1d7b866a03fa 100644
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
+ }
+ /*
+- * Fetch a guest pte for a guest virtual address
++ * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
+  */
+ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+                                   struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+-                                  gva_t addr, u32 access)
++                                  gpa_t addr, u32 access)
+ {
+       int ret;
+       pt_element_t pte;
+@@ -496,7 +496,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+ }
+ static int FNAME(walk_addr)(struct guest_walker *walker,
+-                          struct kvm_vcpu *vcpu, gva_t addr, u32 access)
++                          struct kvm_vcpu *vcpu, gpa_t addr, u32 access)
+ {
+       return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
+                                       access);
+@@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
+  * If the guest tries to write a write-protected page, we need to
+  * emulate this operation, return 1 to indicate this case.
+  */
+-static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
++static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
+                        struct guest_walker *gw,
+                        int write_fault, int hlevel,
+                        kvm_pfn_t pfn, bool map_writable, bool prefault,
+@@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
+  *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
+  *           a negative value on error.
+  */
+-static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
++static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
+                            bool prefault)
+ {
+       int write_fault = error_code & PFERR_WRITE_MASK;
+@@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
+       spin_unlock(&vcpu->kvm->mmu_lock);
+ }
+-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
++/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
++static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
+                              struct x86_exception *exception)
+ {
+       struct guest_walker walker;
+       gpa_t gpa = UNMAPPED_GVA;
+       int r;
+-      r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
++      r = FNAME(walk_addr)(&walker, vcpu, addr, access);
+       if (r) {
+               gpa = gfn_to_gpa(walker.gfn);
+-              gpa |= vaddr & ~PAGE_MASK;
++              gpa |= addr & ~PAGE_MASK;
+       } else if (exception)
+               *exception = walker.fault;
+@@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+ }
+ #if PTTYPE != PTTYPE_EPT
+-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
++/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
++static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
+                                     u32 access,
+                                     struct x86_exception *exception)
+ {
+@@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
+       gpa_t gpa = UNMAPPED_GVA;
+       int r;
++#ifndef CONFIG_X86_64
++      /* A 64-bit GVA should be impossible on 32-bit KVM. */
++      WARN_ON_ONCE(vaddr >> 32);
++#endif
++
+       r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
+       if (r) {
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 92e025d1b5175..edde5ee8c6f50 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -6409,11 +6409,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
+       return 1;
+ }
+-static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
++static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+                                 bool write_fault_to_shadow_pgtable,
+                                 int emulation_type)
+ {
+-      gpa_t gpa = cr2;
++      gpa_t gpa = cr2_or_gpa;
+       kvm_pfn_t pfn;
+       if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+@@ -6427,7 +6427,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+                * Write permission should be allowed since only
+                * write access need to be emulated.
+                */
+-              gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
++              gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
+               /*
+                * If the mapping is invalid in guest, let cpu retry
+@@ -6484,10 +6484,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
+ }
+ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
+-                            unsigned long cr2,  int emulation_type)
++                            gpa_t cr2_or_gpa,  int emulation_type)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+-      unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
++      unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
+       last_retry_eip = vcpu->arch.last_retry_eip;
+       last_retry_addr = vcpu->arch.last_retry_addr;
+@@ -6516,14 +6516,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
+       if (x86_page_table_writing_insn(ctxt))
+               return false;
+-      if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
++      if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
+               return false;
+       vcpu->arch.last_retry_eip = ctxt->eip;
+-      vcpu->arch.last_retry_addr = cr2;
++      vcpu->arch.last_retry_addr = cr2_or_gpa;
+       if (!vcpu->arch.mmu->direct_map)
+-              gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
++              gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
+       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+@@ -6669,11 +6669,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
+       return false;
+ }
+-int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+-                          unsigned long cr2,
+-                          int emulation_type,
+-                          void *insn,
+-                          int insn_len)
++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
++                          int emulation_type, void *insn, int insn_len)
+ {
+       int r;
+       struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+@@ -6719,8 +6716,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+                               kvm_queue_exception(vcpu, UD_VECTOR);
+                               return 1;
+                       }
+-                      if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
+-                                              emulation_type))
++                      if (reexecute_instruction(vcpu, cr2_or_gpa,
++                                                write_fault_to_spt,
++                                                emulation_type))
+                               return 1;
+                       if (ctxt->have_exception) {
+                               /*
+@@ -6754,7 +6752,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+               return 1;
+       }
+-      if (retry_instruction(ctxt, cr2, emulation_type))
++      if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
+               return 1;
+       /* this is needed for vmware backdoor interface to work since it
+@@ -6766,7 +6764,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+ restart:
+       /* Save the faulting GPA (cr2) in the address field */
+-      ctxt->exception.address = cr2;
++      ctxt->exception.address = cr2_or_gpa;
+       r = x86_emulate_insn(ctxt);
+@@ -6774,7 +6772,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
+               return 1;
+       if (r == EMULATION_FAILED) {
+-              if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
++              if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
+                                       emulation_type))
+                       return 1;
+@@ -10051,7 +10049,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
+             work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
+               return;
+-      vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
++      vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
+ }
+ static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
+@@ -10164,7 +10162,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ {
+       struct x86_exception fault;
+-      trace_kvm_async_pf_not_present(work->arch.token, work->gva);
++      trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
+       kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
+       if (kvm_can_deliver_async_pf(vcpu) &&
+@@ -10199,7 +10197,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+               work->arch.token = ~0; /* broadcast wakeup */
+       else
+               kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+-      trace_kvm_async_pf_ready(work->arch.token, work->gva);
++      trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
+       if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
+           !apf_get_user(vcpu, &val)) {
+diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
+index dbf7442a822b6..de6b55484876a 100644
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -286,7 +286,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
+                                         int page_num);
+ bool kvm_vector_hashing_enabled(void);
+-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
++int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+                           int emulation_type, void *insn, int insn_len);
+ #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index abfc2fbde957c..7d1564c408972 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -204,7 +204,7 @@ struct kvm_async_pf {
+       struct list_head queue;
+       struct kvm_vcpu *vcpu;
+       struct mm_struct *mm;
+-      gva_t gva;
++      gpa_t cr2_or_gpa;
+       unsigned long addr;
+       struct kvm_arch_async_pf arch;
+       bool   wakeup_all;
+@@ -212,8 +212,8 @@ struct kvm_async_pf {
+ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
+ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
+-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
+-                     struct kvm_arch_async_pf *arch);
++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
++                     unsigned long hva, struct kvm_arch_async_pf *arch);
+ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
+ #endif
+diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
+index 35305d6e68cc6..d8ef708a2ef67 100644
+--- a/virt/kvm/async_pf.c
++++ b/virt/kvm/async_pf.c
+@@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work)
+       struct mm_struct *mm = apf->mm;
+       struct kvm_vcpu *vcpu = apf->vcpu;
+       unsigned long addr = apf->addr;
+-      gva_t gva = apf->gva;
++      gpa_t cr2_or_gpa = apf->cr2_or_gpa;
+       int locked = 1;
+       might_sleep();
+@@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work)
+        * this point
+        */
+-      trace_kvm_async_pf_completed(addr, gva);
++      trace_kvm_async_pf_completed(addr, cr2_or_gpa);
+       if (swq_has_sleeper(&vcpu->wq))
+               swake_up_one(&vcpu->wq);
+@@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
+       }
+ }
+-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
+-                     struct kvm_arch_async_pf *arch)
++int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
++                     unsigned long hva, struct kvm_arch_async_pf *arch)
+ {
+       struct kvm_async_pf *work;
+@@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
+       work->wakeup_all = false;
+       work->vcpu = vcpu;
+-      work->gva = gva;
++      work->cr2_or_gpa = cr2_or_gpa;
+       work->addr = hva;
+       work->arch = *arch;
+       work->mm = current->mm;
+-- 
+2.20.1
+
index eed9b9f928fdb44e0b0f8cdd52d6de32cbf539c1..cac44e1c9e1af266aa79aebb7f9b4381d9e02eb2 100644 (file)
@@ -289,3 +289,13 @@ mfd-da9062-fix-watchdog-compatible-string.patch
 mfd-rn5t618-mark-adc-control-register-volatile.patch
 mfd-bd70528-fix-hour-register-mask.patch
 x86-timer-don-t-skip-pit-setup-when-apic-is-disabled-or-in-legacy-mode.patch
+btrfs-use-bool-argument-in-free_root_pointers.patch
+btrfs-free-block-groups-after-free-ing-fs-trees.patch
+drm-dp_mst-remove-vcpi-while-disabling-topology-mgr.patch
+kvm-x86-mmu-apply-max-pa-check-for-mmio-sptes-to-32-.patch
+kvm-x86-use-cpuid-to-locate-host-page-table-reserved.patch
+kvm-x86-use-gpa_t-for-cr2-gpa-to-fix-tdp-support-on-.patch
+kvm-x86-fix-overlap-between-spte_mmio_mask-and-gener.patch
+kvm-nvmx-vmread-should-not-set-rflags-to-specify-suc.patch
+kvm-use-vcpu-specific-gva-hva-translation-when-query.patch
+kvm-play-nice-with-read-only-memslots-when-querying-.patch