From: Sasha Levin Date: Sat, 19 Nov 2022 17:24:33 +0000 (-0500) Subject: Fixes for 6.0 X-Git-Tag: v4.19.266~53 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=145065f7985a55793159800c02cb87788b18a148;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.0 Signed-off-by: Sasha Levin --- diff --git a/queue-6.0/arm64-fix-rodata-full-again.patch b/queue-6.0/arm64-fix-rodata-full-again.patch new file mode 100644 index 00000000000..20a2b5d3287 --- /dev/null +++ b/queue-6.0/arm64-fix-rodata-full-again.patch @@ -0,0 +1,61 @@ +From f7c7c8e06c1329617dab170683bfc2069acf4c8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Nov 2022 18:00:15 +0100 +Subject: arm64: fix rodata=full again + +From: Ard Biesheuvel + +[ Upstream commit 2081b3bd0c11757725dcab9ba5d38e1bddb03459 ] + +Commit 2e8cff0a0eee87b2 ("arm64: fix rodata=full") addressed a couple of +issues with the rodata= kernel command line option, which is not a +simple boolean on arm64, and inadvertently got broken due to changes in +the generic bool handling. + +Unfortunately, the resulting code never clears the rodata_full boolean +variable if it defaults to true and rodata=on or rodata=off is passed, +as the generic code is not aware of the existence of this variable. + +Given the way this code is plumbed together, clearing rodata_full when +returning false from arch_parse_debug_rodata() may result in +inconsistencies if the generic code decides that it cannot parse the +right hand side, so the best way to deal with this is to only take +rodata_full in account if rodata_enabled is also true. + +Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") +Cc: # 6.0.x +Signed-off-by: Ard Biesheuvel +Acked-by: Will Deacon +Link: https://lore.kernel.org/r/20221103170015.4124426-1-ardb@kernel.org +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + arch/arm64/mm/pageattr.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c +index d107c3d434e2..5922178d7a06 100644 +--- a/arch/arm64/mm/pageattr.c ++++ b/arch/arm64/mm/pageattr.c +@@ -26,7 +26,7 @@ bool can_set_direct_map(void) + * mapped at page granularity, so that it is possible to + * protect/unprotect single pages. + */ +- return rodata_full || debug_pagealloc_enabled() || ++ return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE); + } + +@@ -102,7 +102,8 @@ static int change_memory_common(unsigned long addr, int numpages, + * If we are manipulating read-only permissions, apply the same + * change to the linear mapping of the pages that back this VM area. + */ +- if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || ++ if (rodata_enabled && ++ rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + pgprot_val(clear_mask) == PTE_RDONLY)) { + for (i = 0; i < area->nr_pages; i++) { + __change_memory_common((u64)page_address(area->pages[i]), +-- +2.35.1 + diff --git a/queue-6.0/arm64-mm-fold-check-for-kfence-into-can_set_direct_m.patch b/queue-6.0/arm64-mm-fold-check-for-kfence-into-can_set_direct_m.patch new file mode 100644 index 00000000000..3921ad6dc32 --- /dev/null +++ b/queue-6.0/arm64-mm-fold-check-for-kfence-into-can_set_direct_m.patch @@ -0,0 +1,81 @@ +From 829c8c4d6a38d2e384016170c505d8ecef5815a0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Sep 2022 10:48:41 +0300 +Subject: arm64/mm: fold check for KFENCE into can_set_direct_map() + +From: Mike Rapoport + +[ Upstream commit b9dd04a20f81333e4b99662f1bbaf7c9e3a1e137 ] + +KFENCE requires linear map to be mapped at page granularity, so that it +is possible to protect/unprotect single pages, just like with +rodata_full and DEBUG_PAGEALLOC. + +Instead of repating + + can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE) + +make can_set_direct_map() handle the KFENCE case. + +This also prevents potential false positives in kernel_page_present() +that may return true for non-present page if CONFIG_KFENCE is enabled. + +Signed-off-by: Mike Rapoport +Reviewed-by: Anshuman Khandual +Link: https://lore.kernel.org/r/20220921074841.382615-1-rppt@kernel.org +Signed-off-by: Catalin Marinas +Stable-dep-of: 2081b3bd0c11 ("arm64: fix rodata=full again") +Signed-off-by: Sasha Levin +--- + arch/arm64/mm/mmu.c | 8 ++------ + arch/arm64/mm/pageattr.c | 8 +++++++- + 2 files changed, 9 insertions(+), 7 deletions(-) + +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index eb489302c28a..e8de94dd5a60 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -539,7 +539,7 @@ static void __init map_mem(pgd_t *pgdp) + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + +- if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) ++ if (can_set_direct_map()) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + /* +@@ -1551,11 +1551,7 @@ int arch_add_memory(int nid, u64 start, u64 size, + + VM_BUG_ON(!mhp_range_allowed(start, size, true)); + +- /* +- * KFENCE requires linear map to be mapped at page granularity, so that +- * it is possible to protect/unprotect single pages in the KFENCE pool. +- */ +- if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) ++ if (can_set_direct_map()) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + + __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), +diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c +index 64e985eaa52d..d107c3d434e2 100644 +--- a/arch/arm64/mm/pageattr.c ++++ b/arch/arm64/mm/pageattr.c +@@ -21,7 +21,13 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED + + bool can_set_direct_map(void) + { +- return rodata_full || debug_pagealloc_enabled(); ++ /* ++ * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be ++ * mapped at page granularity, so that it is possible to ++ * protect/unprotect single pages. ++ */ ++ return rodata_full || debug_pagealloc_enabled() || ++ IS_ENABLED(CONFIG_KFENCE); + } + + static int change_page_range(pte_t *ptep, unsigned long addr, void *data) +-- +2.35.1 + diff --git a/queue-6.0/hugetlb-rename-remove_huge_page-to-hugetlb_delete_fr.patch b/queue-6.0/hugetlb-rename-remove_huge_page-to-hugetlb_delete_fr.patch new file mode 100644 index 00000000000..5f8ea6ce11b --- /dev/null +++ b/queue-6.0/hugetlb-rename-remove_huge_page-to-hugetlb_delete_fr.patch @@ -0,0 +1,155 @@ +From dc228ca9ad0cd11124b1e535d9c108affb272223 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Sep 2022 15:18:04 -0700 +Subject: hugetlb: rename remove_huge_page to hugetlb_delete_from_page_cache + +From: Mike Kravetz + +[ Upstream commit 7e1813d48dd30e6c6f235f6661d1bc108fcab528 ] + +remove_huge_page removes a hugetlb page from the page cache. Change to +hugetlb_delete_from_page_cache as it is a more descriptive name. +huge_add_to_page_cache is global in scope, but only deals with hugetlb +pages. For consistency and clarity, rename to hugetlb_add_to_page_cache. + +Link: https://lkml.kernel.org/r/20220914221810.95771-4-mike.kravetz@oracle.com +Signed-off-by: Mike Kravetz +Reviewed-by: Miaohe Lin +Cc: Andrea Arcangeli +Cc: "Aneesh Kumar K.V" +Cc: Axel Rasmussen +Cc: David Hildenbrand +Cc: Davidlohr Bueso +Cc: James Houghton +Cc: "Kirill A. Shutemov" +Cc: Michal Hocko +Cc: Mina Almasry +Cc: Muchun Song +Cc: Naoya Horiguchi +Cc: Pasha Tatashin +Cc: Peter Xu +Cc: Prakash Sangappa +Cc: Sven Schnelle +Signed-off-by: Andrew Morton +Stable-dep-of: 8625147cafaa ("hugetlbfs: don't delete error page from pagecache") +Signed-off-by: Sasha Levin +--- + fs/hugetlbfs/inode.c | 21 ++++++++++----------- + include/linux/hugetlb.h | 2 +- + mm/hugetlb.c | 8 ++++---- + 3 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index f7a5b5124d8a..b6406e7ab64b 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -364,7 +364,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, + return -EINVAL; + } + +-static void remove_huge_page(struct page *page) ++static void hugetlb_delete_from_page_cache(struct page *page) + { + ClearPageDirty(page); + ClearPageUptodate(page); +@@ -487,15 +487,14 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, + folio_lock(folio); + /* + * We must free the huge page and remove from page +- * cache (remove_huge_page) BEFORE removing the +- * region/reserve map (hugetlb_unreserve_pages). In +- * rare out of memory conditions, removal of the +- * region/reserve map could fail. Correspondingly, +- * the subpool and global reserve usage count can need +- * to be adjusted. ++ * cache BEFORE removing the region/reserve map ++ * (hugetlb_unreserve_pages). In rare out of memory ++ * conditions, removal of the region/reserve map could ++ * fail. Correspondingly, the subpool and global ++ * reserve usage count can need to be adjusted. + */ + VM_BUG_ON(HPageRestoreReserve(&folio->page)); +- remove_huge_page(&folio->page); ++ hugetlb_delete_from_page_cache(&folio->page); + freed++; + if (!truncate_op) { + if (unlikely(hugetlb_unreserve_pages(inode, +@@ -737,7 +736,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, + } + clear_huge_page(page, addr, pages_per_huge_page(h)); + __SetPageUptodate(page); +- error = huge_add_to_page_cache(page, mapping, index); ++ error = hugetlb_add_to_page_cache(page, mapping, index); + if (unlikely(error)) { + restore_reserve_on_error(h, &pseudo_vma, addr, page); + put_page(page); +@@ -749,7 +748,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, + + SetHPageMigratable(page); + /* +- * unlock_page because locked by huge_add_to_page_cache() ++ * unlock_page because locked by hugetlb_add_to_page_cache() + * put_page() due to reference from alloc_huge_page() + */ + unlock_page(page); +@@ -994,7 +993,7 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, + struct inode *inode = mapping->host; + pgoff_t index = page->index; + +- remove_huge_page(page); ++ hugetlb_delete_from_page_cache(page); + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) + hugetlb_fix_reserve_counts(inode); + +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index 67c88b82fc32..53db3648207a 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -665,7 +665,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask); + struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, + unsigned long address); +-int huge_add_to_page_cache(struct page *page, struct address_space *mapping, ++int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, + pgoff_t idx); + void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, + unsigned long address, struct page *page); +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index ecc197d24efb..5e414c90f82f 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5445,7 +5445,7 @@ static bool hugetlbfs_pagecache_present(struct hstate *h, + return page != NULL; + } + +-int huge_add_to_page_cache(struct page *page, struct address_space *mapping, ++int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, + pgoff_t idx) + { + struct folio *folio = page_folio(page); +@@ -5583,7 +5583,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, + new_page = true; + + if (vma->vm_flags & VM_MAYSHARE) { +- int err = huge_add_to_page_cache(page, mapping, idx); ++ int err = hugetlb_add_to_page_cache(page, mapping, idx); + if (err) { + put_page(page); + if (err == -EEXIST) +@@ -6008,11 +6008,11 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + + /* + * Serialization between remove_inode_hugepages() and +- * huge_add_to_page_cache() below happens through the ++ * hugetlb_add_to_page_cache() below happens through the + * hugetlb_fault_mutex_table that here must be hold by + * the caller. + */ +- ret = huge_add_to_page_cache(page, mapping, idx); ++ ret = hugetlb_add_to_page_cache(page, mapping, idx); + if (ret) + goto out_release_nounlock; + page_in_pagecache = true; +-- +2.35.1 + diff --git a/queue-6.0/hugetlbfs-don-t-delete-error-page-from-pagecache.patch b/queue-6.0/hugetlbfs-don-t-delete-error-page-from-pagecache.patch new file mode 100644 index 00000000000..92bfd9226fd --- /dev/null +++ b/queue-6.0/hugetlbfs-don-t-delete-error-page-from-pagecache.patch @@ -0,0 +1,123 @@ +From 111ecae8867badca78f721b3633ab2d59c94ed01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 20:01:25 +0000 +Subject: hugetlbfs: don't delete error page from pagecache + +From: James Houghton + +[ Upstream commit 8625147cafaa9ba74713d682f5185eb62cb2aedb ] + +This change is very similar to the change that was made for shmem [1], and +it solves the same problem but for HugeTLBFS instead. + +Currently, when poison is found in a HugeTLB page, the page is removed +from the page cache. That means that attempting to map or read that +hugepage in the future will result in a new hugepage being allocated +instead of notifying the user that the page was poisoned. As [1] states, +this is effectively memory corruption. + +The fix is to leave the page in the page cache. If the user attempts to +use a poisoned HugeTLB page with a syscall, the syscall will fail with +EIO, the same error code that shmem uses. For attempts to map the page, +the thread will get a BUS_MCEERR_AR SIGBUS. + +[1]: commit a76054266661 ("mm: shmem: don't truncate page if memory failure happens") + +Link: https://lkml.kernel.org/r/20221018200125.848471-1-jthoughton@google.com +Signed-off-by: James Houghton +Reviewed-by: Mike Kravetz +Reviewed-by: Naoya Horiguchi +Tested-by: Naoya Horiguchi +Reviewed-by: Yang Shi +Cc: Axel Rasmussen +Cc: James Houghton +Cc: Miaohe Lin +Cc: Muchun Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/hugetlbfs/inode.c | 13 ++++++------- + mm/hugetlb.c | 4 ++++ + mm/memory-failure.c | 5 ++++- + 3 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index b6406e7ab64b..fbcfa6bfee80 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -328,6 +328,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) + } else { + unlock_page(page); + ++ if (PageHWPoison(page)) { ++ put_page(page); ++ retval = -EIO; ++ break; ++ } ++ + /* + * We have the page, copy it to user space buffer. + */ +@@ -990,13 +996,6 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping, + static int hugetlbfs_error_remove_page(struct address_space *mapping, + struct page *page) + { +- struct inode *inode = mapping->host; +- pgoff_t index = page->index; +- +- hugetlb_delete_from_page_cache(page); +- if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) +- hugetlb_fix_reserve_counts(inode); +- + return 0; + } + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 5e414c90f82f..dbb558e71e9e 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6021,6 +6021,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + ptl = huge_pte_lockptr(h, dst_mm, dst_pte); + spin_lock(ptl); + ++ ret = -EIO; ++ if (PageHWPoison(page)) ++ goto out_release_unlock; ++ + /* + * Recheck the i_size after holding PT lock to make sure not + * to leave any page mapped (as page_mapped()) beyond the end +diff --git a/mm/memory-failure.c b/mm/memory-failure.c +index e7ac570dda75..4d302f6b02fc 100644 +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1079,6 +1079,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) + int res; + struct page *hpage = compound_head(p); + struct address_space *mapping; ++ bool extra_pins = false; + + if (!PageHuge(hpage)) + return MF_DELAYED; +@@ -1086,6 +1087,8 @@ static int me_huge_page(struct page_state *ps, struct page *p) + mapping = page_mapping(hpage); + if (mapping) { + res = truncate_error_page(hpage, page_to_pfn(p), mapping); ++ /* The page is kept in page cache. */ ++ extra_pins = true; + unlock_page(hpage); + } else { + unlock_page(hpage); +@@ -1103,7 +1106,7 @@ static int me_huge_page(struct page_state *ps, struct page *p) + } + } + +- if (has_extra_refcount(ps, p, false)) ++ if (has_extra_refcount(ps, p, extra_pins)) + res = MF_FAILED; + + return res; +-- +2.35.1 + diff --git a/queue-6.0/kvm-svm-do-not-allocate-struct-svm_cpu_data-dynamica.patch b/queue-6.0/kvm-svm-do-not-allocate-struct-svm_cpu_data-dynamica.patch new file mode 100644 index 00000000000..7cd43354650 --- /dev/null +++ b/queue-6.0/kvm-svm-do-not-allocate-struct-svm_cpu_data-dynamica.patch @@ -0,0 +1,194 @@ +From a0a3bfa11ebc70ae398450478ae02e91622e420c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 09:07:55 -0500 +Subject: KVM: SVM: do not allocate struct svm_cpu_data dynamically + +From: Paolo Bonzini + +[ Upstream commit 73412dfeea724e6bd775ba64d21157ff322eac9a ] + +The svm_data percpu variable is a pointer, but it is allocated via +svm_hardware_setup() when KVM is loaded. Unlike hardware_enable() +this means that it is never NULL for the whole lifetime of KVM, and +static allocation does not waste any memory compared to the status quo. +It is also more efficient and more easily handled from assembly code, +so do it and don't look back. + +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Stable-dep-of: e287bd005ad9 ("KVM: SVM: restore host save area from assembly") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/svm/sev.c | 4 ++-- + arch/x86/kvm/svm/svm.c | 41 +++++++++++++++-------------------------- + arch/x86/kvm/svm/svm.h | 2 +- + 3 files changed, 18 insertions(+), 29 deletions(-) + +diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c +index c9c9bd453a97..efaaef2b7ae1 100644 +--- a/arch/x86/kvm/svm/sev.c ++++ b/arch/x86/kvm/svm/sev.c +@@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev) + __set_bit(sev->asid, sev_reclaim_asid_bitmap); + + for_each_possible_cpu(cpu) { +- sd = per_cpu(svm_data, cpu); ++ sd = per_cpu_ptr(&svm_data, cpu); + sd->sev_vmcbs[sev->asid] = NULL; + } + +@@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) + + void pre_sev_run(struct vcpu_svm *svm, int cpu) + { +- struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + int asid = sev_get_asid(svm->vcpu.kvm); + + /* Assign the asid allocated with this SEV guest */ +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index ecf4d8233e49..6b2f332f5d54 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -245,7 +245,7 @@ struct kvm_ldttss_desc { + u32 zero1; + } __attribute__((packed)); + +-DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); ++DEFINE_PER_CPU(struct svm_cpu_data, svm_data); + + /* + * Only MSR_TSC_AUX is switched via the user return hook. EFER is switched via +@@ -583,12 +583,7 @@ static int svm_hardware_enable(void) + pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); + return -EINVAL; + } +- sd = per_cpu(svm_data, me); +- if (!sd) { +- pr_err("%s: svm_data is NULL on %d\n", __func__, me); +- return -EINVAL; +- } +- ++ sd = per_cpu_ptr(&svm_data, me); + sd->asid_generation = 1; + sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; + sd->next_asid = sd->max_asid + 1; +@@ -648,41 +643,35 @@ static int svm_hardware_enable(void) + + static void svm_cpu_uninit(int cpu) + { +- struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + +- if (!sd) ++ if (!sd->save_area) + return; + +- per_cpu(svm_data, cpu) = NULL; + kfree(sd->sev_vmcbs); + __free_page(sd->save_area); +- kfree(sd); ++ sd->save_area = NULL; + } + + static int svm_cpu_init(int cpu) + { +- struct svm_cpu_data *sd; ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + int ret = -ENOMEM; + +- sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); +- if (!sd) +- return ret; ++ memset(sd, 0, sizeof(struct svm_cpu_data)); + sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!sd->save_area) +- goto free_cpu_data; ++ return ret; + + ret = sev_cpu_init(sd); + if (ret) + goto free_save_area; + +- per_cpu(svm_data, cpu) = sd; +- + return 0; + + free_save_area: + __free_page(sd->save_area); +-free_cpu_data: +- kfree(sd); ++ sd->save_area = NULL; + return ret; + + } +@@ -1426,7 +1415,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) + int i; + + for_each_online_cpu(i) +- cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); ++ cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL); + } + + static void svm_vcpu_free(struct kvm_vcpu *vcpu) +@@ -1451,7 +1440,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); +- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); + + if (sev_es_guest(vcpu->kvm)) + sev_es_unmap_ghcb(svm); +@@ -1488,7 +1477,7 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu) + static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) + { + struct vcpu_svm *svm = to_svm(vcpu); +- struct svm_cpu_data *sd = per_cpu(svm_data, cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); + + if (sd->current_vmcb != svm->vmcb) { + sd->current_vmcb = svm->vmcb; +@@ -3443,7 +3432,7 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) + + static void reload_tss(struct kvm_vcpu *vcpu) + { +- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); + + sd->tss_desc->type = 9; /* available 32/64-bit TSS */ + load_TR_desc(); +@@ -3451,7 +3440,7 @@ static void reload_tss(struct kvm_vcpu *vcpu) + + static void pre_svm_run(struct kvm_vcpu *vcpu) + { +- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); + struct vcpu_svm *svm = to_svm(vcpu); + + /* +@@ -3920,7 +3909,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) + if (sev_es_guest(vcpu->kvm)) { + __svm_sev_es_vcpu_run(svm); + } else { +- struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); ++ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); + + __svm_vcpu_run(svm); + vmload(__sme_page_pa(sd->save_area)); +diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h +index 8a8894d948a0..f1483209e186 100644 +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -294,7 +294,7 @@ struct svm_cpu_data { + struct vmcb **sev_vmcbs; + }; + +-DECLARE_PER_CPU(struct svm_cpu_data *, svm_data); ++DECLARE_PER_CPU(struct svm_cpu_data, svm_data); + + void recalc_intercepts(struct vcpu_svm *svm); + +-- +2.35.1 + diff --git a/queue-6.0/kvm-svm-move-msr_ia32_spec_ctrl-save-restore-to-asse.patch b/queue-6.0/kvm-svm-move-msr_ia32_spec_ctrl-save-restore-to-asse.patch new file mode 100644 index 00000000000..2b71f62a18e --- /dev/null +++ b/queue-6.0/kvm-svm-move-msr_ia32_spec_ctrl-save-restore-to-asse.patch @@ -0,0 +1,385 @@ +From 580156a1db7717586424b4c61efdf5480ce82804 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Sep 2022 14:24:40 -0400 +Subject: KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly + +From: Paolo Bonzini + +[ Upstream commit 9f2febf3f04daebdaaa5a43cfa20e3844905c0f9 ] + +Restoration of the host IA32_SPEC_CTRL value is probably too late +with respect to the return thunk training sequence. + +With respect to the user/kernel boundary, AMD says, "If software chooses +to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel +exit), software should set STIBP to 1 before executing the return thunk +training sequence." I assume the same requirements apply to the guest/host +boundary. The return thunk training sequence is in vmenter.S, quite close +to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's +IA32_SPEC_CTRL value is not restored until much later. + +To avoid this, move the restoration of host SPEC_CTRL to assembly and, +for consistency, move the restoration of the guest SPEC_CTRL as well. +This is not particularly difficult, apart from some care to cover both +32- and 64-bit, and to share code between SEV-ES and normal vmentry. + +Cc: stable@vger.kernel.org +Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") +Suggested-by: Jim Mattson +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/cpu/bugs.c | 13 +--- + arch/x86/kvm/kvm-asm-offsets.c | 1 + + arch/x86/kvm/svm/svm.c | 37 ++++------ + arch/x86/kvm/svm/svm.h | 4 +- + arch/x86/kvm/svm/vmenter.S | 119 ++++++++++++++++++++++++++++++++- + 5 files changed, 136 insertions(+), 38 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index da7c361f47e0..6ec0b7ce7453 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -196,22 +196,15 @@ void __init check_bugs(void) + } + + /* +- * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is +- * done in vmenter.S. ++ * NOTE: This function is *only* called for SVM, since Intel uses ++ * MSR_IA32_SPEC_CTRL for SSBD. + */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); ++ u64 guestval, hostval; + struct thread_info *ti = current_thread_info(); + +- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { +- if (hostval != guestval) { +- msrval = setguest ? guestval : hostval; +- wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +- } +- } +- + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. +diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c +index 1b805cd24d66..24a710d37323 100644 +--- a/arch/x86/kvm/kvm-asm-offsets.c ++++ b/arch/x86/kvm/kvm-asm-offsets.c +@@ -16,6 +16,7 @@ static void __used common(void) + BLANK(); + OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs); + OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); ++ OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); + OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index c14fabd662f6..e80756ab141b 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -722,6 +722,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) + u32 offset; + u32 *msrpm; + ++ /* ++ * For non-nested case: ++ * If the L01 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ * ++ * For nested case: ++ * If the L02 MSR bitmap does not intercept the MSR, then we need to ++ * save it. ++ */ + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: + to_svm(vcpu)->msrpm; + +@@ -3902,16 +3911,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + return EXIT_FASTPATH_NONE; + } + +-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) ++static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) + { + struct vcpu_svm *svm = to_svm(vcpu); + + guest_state_enter_irqoff(); + + if (sev_es_guest(vcpu->kvm)) +- __svm_sev_es_vcpu_run(svm); ++ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + else +- __svm_vcpu_run(svm); ++ __svm_vcpu_run(svm, spec_ctrl_intercepted); + + guest_state_exit_irqoff(); + } +@@ -3919,6 +3928,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) + static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) + { + struct vcpu_svm *svm = to_svm(vcpu); ++ bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL); + + trace_kvm_entry(vcpu); + +@@ -3977,26 +3987,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) + if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL)) + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + +- svm_vcpu_enter_exit(vcpu); +- +- /* +- * We do not use IBRS in the kernel. If this vCPU has used the +- * SPEC_CTRL MSR it may have left it on; save the value and +- * turn it off. This is much more efficient than blindly adding +- * it to the atomic save/restore list. Especially as the former +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM. +- * +- * For non-nested case: +- * If the L01 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- * +- * For nested case: +- * If the L02 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- */ +- if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) && +- unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) +- svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); ++ svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted); + + if (!sev_es_guest(vcpu->kvm)) + reload_tss(vcpu); +diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h +index 8744f3b1d217..ea3049b978ea 100644 +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -683,7 +683,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); + + /* vmenter.S */ + +-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm); +-void __svm_vcpu_run(struct vcpu_svm *svm); ++void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); ++void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); + + #endif +diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S +index 57440acfc73e..34367dc203f2 100644 +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -32,9 +32,69 @@ + + .section .noinstr.text, "ax" + ++.macro RESTORE_GUEST_SPEC_CTRL ++ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ ++ ALTERNATIVE_2 "", \ ++ "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \ ++ "", X86_FEATURE_V_SPEC_CTRL ++801: ++.endm ++.macro RESTORE_GUEST_SPEC_CTRL_BODY ++800: ++ /* ++ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the ++ * host's, write the MSR. This is kept out-of-line so that the common ++ * case does not have to jump. ++ * ++ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, ++ * there must not be any returns or indirect branches between this code ++ * and vmentry. ++ */ ++ movl SVM_spec_ctrl(%_ASM_DI), %eax ++ cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax ++ je 801b ++ mov $MSR_IA32_SPEC_CTRL, %ecx ++ xor %edx, %edx ++ wrmsr ++ jmp 801b ++.endm ++ ++.macro RESTORE_HOST_SPEC_CTRL ++ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */ ++ ALTERNATIVE_2 "", \ ++ "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \ ++ "", X86_FEATURE_V_SPEC_CTRL ++901: ++.endm ++.macro RESTORE_HOST_SPEC_CTRL_BODY ++900: ++ /* Same for after vmexit. */ ++ mov $MSR_IA32_SPEC_CTRL, %ecx ++ ++ /* ++ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, ++ * if it was not intercepted during guest execution. ++ */ ++ cmpb $0, (%_ASM_SP) ++ jnz 998f ++ rdmsr ++ movl %eax, SVM_spec_ctrl(%_ASM_DI) ++998: ++ ++ /* Now restore the host value of the MSR if different from the guest's. */ ++ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax ++ cmp SVM_spec_ctrl(%_ASM_DI), %eax ++ je 901b ++ xor %edx, %edx ++ wrmsr ++ jmp 901b ++.endm ++ ++ + /** + * __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode + * @svm: struct vcpu_svm * ++ * @spec_ctrl_intercepted: bool + */ + SYM_FUNC_START(__svm_vcpu_run) + push %_ASM_BP +@@ -54,17 +114,26 @@ SYM_FUNC_START(__svm_vcpu_run) + * order compared to when they are needed. + */ + ++ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ ++ push %_ASM_ARG2 ++ + /* Needed to restore access to percpu variables. */ + __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) + +- /* Save @svm. */ ++ /* Finally save @svm. */ + push %_ASM_ARG1 + + .ifnc _ASM_ARG1, _ASM_DI +- /* Move @svm to RDI. */ ++ /* ++ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX ++ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. ++ */ + mov %_ASM_ARG1, %_ASM_DI + .endif + ++ /* Clobbers RAX, RCX, RDX. */ ++ RESTORE_GUEST_SPEC_CTRL ++ + /* + * Use a single vmcb (vmcb01 because it's always valid) for + * context switching guest state via VMLOAD/VMSAVE, that way +@@ -142,6 +211,9 @@ SYM_FUNC_START(__svm_vcpu_run) + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + #endif + ++ /* Clobbers RAX, RCX, RDX. */ ++ RESTORE_HOST_SPEC_CTRL ++ + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the +@@ -177,6 +249,9 @@ SYM_FUNC_START(__svm_vcpu_run) + xor %r15d, %r15d + #endif + ++ /* "Pop" @spec_ctrl_intercepted. */ ++ pop %_ASM_BX ++ + pop %_ASM_BX + + #ifdef CONFIG_X86_64 +@@ -191,6 +266,9 @@ SYM_FUNC_START(__svm_vcpu_run) + pop %_ASM_BP + RET + ++ RESTORE_GUEST_SPEC_CTRL_BODY ++ RESTORE_HOST_SPEC_CTRL_BODY ++ + 10: cmpb $0, kvm_rebooting + jne 2b + ud2 +@@ -214,6 +292,7 @@ SYM_FUNC_END(__svm_vcpu_run) + /** + * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode + * @svm: struct vcpu_svm * ++ * @spec_ctrl_intercepted: bool + */ + SYM_FUNC_START(__svm_sev_es_vcpu_run) + push %_ASM_BP +@@ -228,8 +307,30 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + #endif + push %_ASM_BX + ++ /* ++ * Save variables needed after vmexit on the stack, in inverse ++ * order compared to when they are needed. ++ */ ++ ++ /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ ++ push %_ASM_ARG2 ++ ++ /* Save @svm. */ ++ push %_ASM_ARG1 ++ ++.ifnc _ASM_ARG1, _ASM_DI ++ /* ++ * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX ++ * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. ++ */ ++ mov %_ASM_ARG1, %_ASM_DI ++.endif ++ ++ /* Clobbers RAX, RCX, RDX. */ ++ RESTORE_GUEST_SPEC_CTRL ++ + /* Get svm->current_vmcb->pa into RAX. */ +- mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX ++ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX + mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX + + /* Enter guest mode */ +@@ -239,11 +340,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + + 2: cli + ++ /* Pop @svm to RDI, guest registers have been saved already. */ ++ pop %_ASM_DI ++ + #ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + #endif + ++ /* Clobbers RAX, RCX, RDX. */ ++ RESTORE_HOST_SPEC_CTRL ++ + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the +@@ -253,6 +360,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + */ + UNTRAIN_RET + ++ /* "Pop" @spec_ctrl_intercepted. */ ++ pop %_ASM_BX ++ + pop %_ASM_BX + + #ifdef CONFIG_X86_64 +@@ -267,6 +377,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + pop %_ASM_BP + RET + ++ RESTORE_GUEST_SPEC_CTRL_BODY ++ RESTORE_HOST_SPEC_CTRL_BODY ++ + 3: cmpb $0, kvm_rebooting + jne 2b + ud2 +-- +2.35.1 + diff --git a/queue-6.0/kvm-svm-remove-dead-field-from-struct-svm_cpu_data.patch b/queue-6.0/kvm-svm-remove-dead-field-from-struct-svm_cpu_data.patch new file mode 100644 index 00000000000..a01512d1e41 --- /dev/null +++ b/queue-6.0/kvm-svm-remove-dead-field-from-struct-svm_cpu_data.patch @@ -0,0 +1,50 @@ +From 3e66c3581a7ce63d69b5f1b5f05845badaf2b3dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 08:54:20 -0500 +Subject: KVM: SVM: remove dead field from struct svm_cpu_data + +From: Paolo Bonzini + +[ Upstream commit 181d0fb0bb023e8996b1cf7970e3708d72442b0b ] + +The "cpu" field of struct svm_cpu_data has been write-only since commit +4b656b120249 ("KVM: SVM: force new asid on vcpu migration", 2009-08-05). +Remove it. + +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Stable-dep-of: e287bd005ad9 ("KVM: SVM: restore host save area from assembly") +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/svm/svm.c | 1 - + arch/x86/kvm/svm/svm.h | 2 -- + 2 files changed, 3 deletions(-) + +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 454746641a48..ecf4d8233e49 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -667,7 +667,6 @@ static int svm_cpu_init(int cpu) + sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); + if (!sd) + return ret; +- sd->cpu = cpu; + sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!sd->save_area) + goto free_cpu_data; +diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h +index 7ff1879e73c5..8a8894d948a0 100644 +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -281,8 +281,6 @@ struct vcpu_svm { + }; + + struct svm_cpu_data { +- int cpu; +- + u64 asid_generation; + u32 max_asid; + u32 next_asid; +-- +2.35.1 + diff --git a/queue-6.0/kvm-svm-restore-host-save-area-from-assembly.patch b/queue-6.0/kvm-svm-restore-host-save-area-from-assembly.patch new file mode 100644 index 00000000000..1da3b8c9eb2 --- /dev/null +++ b/queue-6.0/kvm-svm-restore-host-save-area-from-assembly.patch @@ -0,0 +1,175 @@ +From 82432f01ad47e55ca786f40c6adc09eb533c93e4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Nov 2022 03:49:59 -0500 +Subject: KVM: SVM: restore host save area from assembly + +From: Paolo Bonzini + +[ Upstream commit e287bd005ad9d85dd6271dd795d3ecfb6bca46ad ] + +Allow access to the percpu area via the GS segment base, which is +needed in order to access the saved host spec_ctrl value. In linux-next +FILL_RETURN_BUFFER also needs to access percpu data. + +For simplicity, the physical address of the save area is added to struct +svm_cpu_data. + +Cc: stable@vger.kernel.org +Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk") +Reported-by: Nathan Chancellor +Analyzed-by: Andrew Cooper +Tested-by: Nathan Chancellor +Reviewed-by: Sean Christopherson +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/kvm-asm-offsets.c | 1 + + arch/x86/kvm/svm/svm.c | 14 ++++++-------- + arch/x86/kvm/svm/svm.h | 2 ++ + arch/x86/kvm/svm/svm_ops.h | 5 ----- + arch/x86/kvm/svm/vmenter.S | 17 +++++++++++++++++ + 5 files changed, 26 insertions(+), 13 deletions(-) + +diff --git a/arch/x86/kvm/kvm-asm-offsets.c b/arch/x86/kvm/kvm-asm-offsets.c +index f83e88b85bf2..1b805cd24d66 100644 +--- a/arch/x86/kvm/kvm-asm-offsets.c ++++ b/arch/x86/kvm/kvm-asm-offsets.c +@@ -18,6 +18,7 @@ static void __used common(void) + OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb); + OFFSET(SVM_vmcb01, vcpu_svm, vmcb01); + OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa); ++ OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa); + } + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { +diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c +index 6b2f332f5d54..c14fabd662f6 100644 +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -594,7 +594,7 @@ static int svm_hardware_enable(void) + + wrmsrl(MSR_EFER, efer | EFER_SVME); + +- wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area)); ++ wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); + + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + /* +@@ -650,6 +650,7 @@ static void svm_cpu_uninit(int cpu) + + kfree(sd->sev_vmcbs); + __free_page(sd->save_area); ++ sd->save_area_pa = 0; + sd->save_area = NULL; + } + +@@ -667,6 +668,7 @@ static int svm_cpu_init(int cpu) + if (ret) + goto free_save_area; + ++ sd->save_area_pa = __sme_page_pa(sd->save_area); + return 0; + + free_save_area: +@@ -1452,7 +1454,7 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) + * Save additional host state that will be restored on VMEXIT (sev-es) + * or subsequent vmload of host save area. + */ +- vmsave(__sme_page_pa(sd->save_area)); ++ vmsave(sd->save_area_pa); + if (sev_es_guest(vcpu->kvm)) { + struct sev_es_save_area *hostsa; + hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); +@@ -3906,14 +3908,10 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) + + guest_state_enter_irqoff(); + +- if (sev_es_guest(vcpu->kvm)) { ++ if (sev_es_guest(vcpu->kvm)) + __svm_sev_es_vcpu_run(svm); +- } else { +- struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); +- ++ else + __svm_vcpu_run(svm); +- vmload(__sme_page_pa(sd->save_area)); +- } + + guest_state_exit_irqoff(); + } +diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h +index f1483209e186..8744f3b1d217 100644 +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -288,6 +288,8 @@ struct svm_cpu_data { + struct kvm_ldttss_desc *tss_desc; + + struct page *save_area; ++ unsigned long save_area_pa; ++ + struct vmcb *current_vmcb; + + /* index = sev_asid, value = vmcb pointer */ +diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h +index 9430d6437c9f..36c8af87a707 100644 +--- a/arch/x86/kvm/svm/svm_ops.h ++++ b/arch/x86/kvm/svm/svm_ops.h +@@ -61,9 +61,4 @@ static __always_inline void vmsave(unsigned long pa) + svm_asm1(vmsave, "a" (pa), "memory"); + } + +-static __always_inline void vmload(unsigned long pa) +-{ +- svm_asm1(vmload, "a" (pa), "memory"); +-} +- + #endif /* __KVM_X86_SVM_OPS_H */ +diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S +index 5bc2ed7d79c0..57440acfc73e 100644 +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -49,6 +49,14 @@ SYM_FUNC_START(__svm_vcpu_run) + #endif + push %_ASM_BX + ++ /* ++ * Save variables needed after vmexit on the stack, in inverse ++ * order compared to when they are needed. ++ */ ++ ++ /* Needed to restore access to percpu variables. */ ++ __ASM_SIZE(push) PER_CPU_VAR(svm_data + SD_save_area_pa) ++ + /* Save @svm. */ + push %_ASM_ARG1 + +@@ -124,6 +132,11 @@ SYM_FUNC_START(__svm_vcpu_run) + 5: vmsave %_ASM_AX + 6: + ++ /* Restores GSBASE among other things, allowing access to percpu data. */ ++ pop %_ASM_AX ++7: vmload %_ASM_AX ++8: ++ + #ifdef CONFIG_RETPOLINE + /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +@@ -187,10 +200,14 @@ SYM_FUNC_START(__svm_vcpu_run) + 50: cmpb $0, kvm_rebooting + jne 6b + ud2 ++70: cmpb $0, kvm_rebooting ++ jne 8b ++ ud2 + + _ASM_EXTABLE(1b, 10b) + _ASM_EXTABLE(3b, 30b) + _ASM_EXTABLE(5b, 50b) ++ _ASM_EXTABLE(7b, 70b) + + SYM_FUNC_END(__svm_vcpu_run) + +-- +2.35.1 + diff --git a/queue-6.0/series b/queue-6.0/series index 8a9e91467a2..126a04d9798 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -60,3 +60,11 @@ x86-cpu-add-several-intel-server-cpu-model-numbers.patch tools-testing-cxl-fix-some-error-exits.patch cifs-always-iterate-smb-sessions-using-primary-chann.patch asoc-codecs-jz4725b-fix-spelling-mistake-sourc-sourc.patch +arm64-mm-fold-check-for-kfence-into-can_set_direct_m.patch +arm64-fix-rodata-full-again.patch +hugetlb-rename-remove_huge_page-to-hugetlb_delete_fr.patch +hugetlbfs-don-t-delete-error-page-from-pagecache.patch +kvm-svm-remove-dead-field-from-struct-svm_cpu_data.patch +kvm-svm-do-not-allocate-struct-svm_cpu_data-dynamica.patch +kvm-svm-restore-host-save-area-from-assembly.patch +kvm-svm-move-msr_ia32_spec_ctrl-save-restore-to-asse.patch