From 0c831e39d4cbdb554e6af87dfe12f8057c283c01 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 30 Nov 2022 13:32:33 +0100 Subject: [PATCH] 6.0-stable patches added patches: loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch mm-correctly-charge-compressed-memory-to-its-memcg.patch mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch --- ...-thread-info-flags-for-kernel-thread.patch | 97 ++++++++++ ...age_write-is-set-in-pmd-pte-_mkdirty.patch | 55 ++++++ ...harge-compressed-memory-to-its-memcg.patch | 45 +++++ ...es-to-failslab-fail_page_alloc-.attr.patch | 165 ++++++++++++++++++ queue-6.0/series | 7 + ...size-calculation-in-__ioremap_caller.patch | 49 ++++++ ...-before-spec-msrs-save-restore-setup.patch | 98 +++++++++++ ...ture-bit-for-tsx-control-msr-support.patch | 115 ++++++++++++ 8 files changed, 631 insertions(+) create mode 100644 queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch create mode 100644 queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch create mode 100644 queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch create mode 100644 queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch create mode 100644 queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch create mode 100644 queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch create mode 100644 queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch diff --git a/queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch b/queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch new file mode 100644 index 00000000000..4fa8cf192cd --- /dev/null +++ b/queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch @@ -0,0 +1,97 @@ +From e428e9613531d1ef6bd0d91352899712b29134fb Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Mon, 21 Nov 2022 19:02:57 +0800 +Subject: LoongArch: Clear FPU/SIMD thread info flags for kernel thread + +From: Huacai Chen + +commit e428e9613531d1ef6bd0d91352899712b29134fb upstream. + +If a kernel thread is created by a user thread, it may carry FPU/SIMD +thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be +considered as a fpu owner and kernel try to save its FPU/SIMD context +and cause such errors: + +[ 41.518931] do_fpu invoked from kernel context![#1]: +[ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 +[ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 +[ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 +[ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 +[ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc +[ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 +[ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 +[ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 +[ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 +[ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 +[ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 +[ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 +[ 41.619160] CSR crmd: 000000b0 +[ 41.619163] CSR prmd: 00000000 +[ 41.622359] CSR euen: 00000000 +[ 41.625558] CSR ecfg: 00071c1c +[ 41.628756] CSR estat: 000f0000 +[ 41.635239] ExcCode : f (SubCode 0) +[ 41.638783] PrId : 0014c010 (Loongson-64bit) +[ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs +[ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) +[ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 +[ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 +[ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 +[ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 +[ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 +[ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 +[ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 +[ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 +[ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 +[ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 +[ 41.742745] ... +[ 41.745252] Call Trace: +[ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 +[ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 +[ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 +[ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 +[ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 +[ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c + +This can be easily triggered by ltp testcase syscalls/io_uring02 and it +can also be easily fixed by clearing the FPU/SIMD thread info flags for +kernel threads in copy_thread(). + +Cc: stable@vger.kernel.org +Reported-by: Qi Hu +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kernel/process.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/arch/loongarch/kernel/process.c ++++ b/arch/loongarch/kernel/process.c +@@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, c + childregs->csr_crmd = p->thread.csr_crmd; + childregs->csr_prmd = p->thread.csr_prmd; + childregs->csr_ecfg = p->thread.csr_ecfg; +- return 0; ++ goto out; + } + + /* user thread */ +@@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, c + */ + childregs->csr_euen = 0; + ++ if (clone_flags & CLONE_SETTLS) ++ childregs->regs[2] = tls; ++ ++out: + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + +- if (clone_flags & CLONE_SETTLS) +- childregs->regs[2] = tls; +- + return 0; + } + diff --git a/queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch b/queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch new file mode 100644 index 00000000000..236e02d7f2d --- /dev/null +++ b/queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch @@ -0,0 +1,55 @@ +From bf2f34a506e66e2979de6b17c337c5d4b25b4d2c Mon Sep 17 00:00:00 2001 +From: Huacai Chen +Date: Mon, 21 Nov 2022 19:02:57 +0800 +Subject: LoongArch: Set _PAGE_DIRTY only if _PAGE_WRITE is set in {pmd,pte}_mkdirty() + +From: Huacai Chen + +commit bf2f34a506e66e2979de6b17c337c5d4b25b4d2c upstream. + +Now {pmd,pte}_mkdirty() set _PAGE_DIRTY bit unconditionally, this causes +random segmentation fault after commit 0ccf7f168e17bb7e ("mm/thp: carry +over dirty bit when thp splits on pmd"). + +The reason is: when fork(), parent process use pmd_wrprotect() to clear +huge page's _PAGE_WRITE and _PAGE_DIRTY (for COW); then pte_mkdirty() set +_PAGE_DIRTY as well as _PAGE_MODIFIED while splitting dirty huge pages; +once _PAGE_DIRTY is set, there will be no tlb modify exception so the COW +machanism fails; and at last memory corruption occurred between parent +and child processes. + +So, we should set _PAGE_DIRTY only when _PAGE_WRITE is set in {pmd,pte}_ +mkdirty(). + +Cc: stable@vger.kernel.org +Cc: Peter Xu +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/include/asm/pgtable.h | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/arch/loongarch/include/asm/pgtable.h ++++ b/arch/loongarch/include/asm/pgtable.h +@@ -349,7 +349,9 @@ static inline pte_t pte_mkclean(pte_t pt + + static inline pte_t pte_mkdirty(pte_t pte) + { +- pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); ++ pte_val(pte) |= _PAGE_MODIFIED; ++ if (pte_val(pte) & _PAGE_WRITE) ++ pte_val(pte) |= _PAGE_DIRTY; + return pte; + } + +@@ -475,7 +477,9 @@ static inline pmd_t pmd_mkclean(pmd_t pm + + static inline pmd_t pmd_mkdirty(pmd_t pmd) + { +- pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); ++ pmd_val(pmd) |= _PAGE_MODIFIED; ++ if (pmd_val(pmd) & _PAGE_WRITE) ++ pmd_val(pmd) |= _PAGE_DIRTY; + return pmd; + } + diff --git a/queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch b/queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch new file mode 100644 index 00000000000..fb96ec1a6fa --- /dev/null +++ b/queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch @@ -0,0 +1,45 @@ +From cd08d80ecdac577bad2e8d6805c7a3859fdefb8d Mon Sep 17 00:00:00 2001 +From: Li Liguang +Date: Mon, 14 Nov 2022 14:48:28 -0500 +Subject: mm: correctly charge compressed memory to its memcg + +From: Li Liguang + +commit cd08d80ecdac577bad2e8d6805c7a3859fdefb8d upstream. + +Kswapd will reclaim memory when memory pressure is high, the annonymous +memory will be compressed and stored in the zpool if zswap is enabled. +The memcg_kmem_bypass() in get_obj_cgroup_from_page() will bypass the +kernel thread and cause the compressed memory not be charged to its memory +cgroup. + +Remove the memcg_kmem_bypass() call and properly charge compressed memory +to its corresponding memory cgroup. + +Link: https://lore.kernel.org/linux-mm/CALvZod4nnn8BHYqAM4xtcR0Ddo2-Wr8uKm9h_CHWUaXw7g_DCg@mail.gmail.com/ +Link: https://lkml.kernel.org/r/20221114194828.100822-1-hannes@cmpxchg.org +Fixes: f4840ccfca25 ("zswap: memcg accounting") +Signed-off-by: Li Liguang +Signed-off-by: Johannes Weiner +Acked-by: Shakeel Butt +Reviewed-by: Muchun Song +Cc: Michal Hocko +Cc: Roman Gushchin +Cc: [5.19+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2971,7 +2971,7 @@ struct obj_cgroup *get_obj_cgroup_from_p + { + struct obj_cgroup *objcg; + +- if (!memcg_kmem_enabled() || memcg_kmem_bypass()) ++ if (!memcg_kmem_enabled()) + return NULL; + + if (PageMemcgKmem(page)) { diff --git a/queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch b/queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch new file mode 100644 index 00000000000..56880e19468 --- /dev/null +++ b/queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch @@ -0,0 +1,165 @@ +From ea4452de2ae987342fadbdd2c044034e6480daad Mon Sep 17 00:00:00 2001 +From: Qi Zheng +Date: Fri, 18 Nov 2022 18:00:11 +0800 +Subject: mm: fix unexpected changes to {failslab|fail_page_alloc}.attr + +From: Qi Zheng + +commit ea4452de2ae987342fadbdd2c044034e6480daad upstream. + +When we specify __GFP_NOWARN, we only expect that no warnings will be +issued for current caller. But in the __should_failslab() and +__should_fail_alloc_page(), the local GFP flags alter the global +{failslab|fail_page_alloc}.attr, which is persistent and shared by all +tasks. This is not what we expected, let's fix it. + +[akpm@linux-foundation.org: unexport should_fail_ex()] +Link: https://lkml.kernel.org/r/20221118100011.2634-1-zhengqi.arch@bytedance.com +Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") +Signed-off-by: Qi Zheng +Reported-by: Dmitry Vyukov +Reviewed-by: Akinobu Mita +Reviewed-by: Jason Gunthorpe +Cc: Akinobu Mita +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/fault-inject.h | 7 +++++-- + lib/fault-inject.c | 13 ++++++++----- + mm/failslab.c | 12 ++++++++++-- + mm/page_alloc.c | 7 +++++-- + 4 files changed, 28 insertions(+), 11 deletions(-) + +--- a/include/linux/fault-inject.h ++++ b/include/linux/fault-inject.h +@@ -20,7 +20,6 @@ struct fault_attr { + atomic_t space; + unsigned long verbose; + bool task_filter; +- bool no_warn; + unsigned long stacktrace_depth; + unsigned long require_start; + unsigned long require_end; +@@ -32,6 +31,10 @@ struct fault_attr { + struct dentry *dname; + }; + ++enum fault_flags { ++ FAULT_NOWARN = 1 << 0, ++}; ++ + #define FAULT_ATTR_INITIALIZER { \ + .interval = 1, \ + .times = ATOMIC_INIT(1), \ +@@ -40,11 +43,11 @@ struct fault_attr { + .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ + .verbose = 2, \ + .dname = NULL, \ +- .no_warn = false, \ + } + + #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER + int setup_fault_attr(struct fault_attr *attr, char *str); ++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); + bool should_fail(struct fault_attr *attr, ssize_t size); + + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +--- a/lib/fault-inject.c ++++ b/lib/fault-inject.c +@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); + + static void fail_dump(struct fault_attr *attr) + { +- if (attr->no_warn) +- return; +- + if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { + printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" + "name %pd, interval %lu, probability %lu, " +@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struc + * http://www.nongnu.org/failmalloc/ + */ + +-bool should_fail(struct fault_attr *attr, ssize_t size) ++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) + { + if (in_task()) { + unsigned int fail_nth = READ_ONCE(current->fail_nth); +@@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr + return false; + + fail: +- fail_dump(attr); ++ if (!(flags & FAULT_NOWARN)) ++ fail_dump(attr); + + if (atomic_read(&attr->times) != -1) + atomic_dec_not_zero(&attr->times); + + return true; + } ++ ++bool should_fail(struct fault_attr *attr, ssize_t size) ++{ ++ return should_fail_ex(attr, size, 0); ++} + EXPORT_SYMBOL_GPL(should_fail); + + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +--- a/mm/failslab.c ++++ b/mm/failslab.c +@@ -16,6 +16,8 @@ static struct { + + bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) + { ++ int flags = 0; ++ + /* No fault-injection for bootstrap cache */ + if (unlikely(s == kmem_cache)) + return false; +@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache + if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) + return false; + ++ /* ++ * In some cases, it expects to specify __GFP_NOWARN ++ * to avoid printing any information(not just a warning), ++ * thus avoiding deadlocks. See commit 6b9dbedbe349 for ++ * details. ++ */ + if (gfpflags & __GFP_NOWARN) +- failslab.attr.no_warn = true; ++ flags |= FAULT_NOWARN; + +- return should_fail(&failslab.attr, s->object_size); ++ return should_fail_ex(&failslab.attr, s->object_size, flags); + } + + static int __init setup_failslab(char *str) +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3883,6 +3883,8 @@ __setup("fail_page_alloc=", setup_fail_p + + static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) + { ++ int flags = 0; ++ + if (order < fail_page_alloc.min_order) + return false; + if (gfp_mask & __GFP_NOFAIL) +@@ -3893,10 +3895,11 @@ static bool __should_fail_alloc_page(gfp + (gfp_mask & __GFP_DIRECT_RECLAIM)) + return false; + ++ /* See comment in __should_failslab() */ + if (gfp_mask & __GFP_NOWARN) +- fail_page_alloc.attr.no_warn = true; ++ flags |= FAULT_NOWARN; + +- return should_fail(&fail_page_alloc.attr, 1 << order); ++ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); + } + + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/queue-6.0/series b/queue-6.0/series index 28a92ec2bac..282fb134ceb 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -210,3 +210,10 @@ kvm-x86-nsvm-harden-svm_free_nested-against-freeing-vmcb02-while-still-in-use.pa kvm-x86-add-kvm_leave_nested.patch kvm-x86-remove-exit_int_info-warning-in-svm_handle_exit.patch kvm-update-gfn_to_pfn_cache-khva-when-it-moves-within-the-same-page.patch +x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch +x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch +x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch +mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch +mm-correctly-charge-compressed-memory-to-its-memcg.patch +loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch +loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch diff --git a/queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch b/queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch new file mode 100644 index 00000000000..d1f64ba2ae3 --- /dev/null +++ b/queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch @@ -0,0 +1,49 @@ +From 4dbd6a3e90e03130973688fd79e19425f720d999 Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Wed, 16 Nov 2022 10:41:24 -0800 +Subject: x86/ioremap: Fix page aligned size calculation in __ioremap_caller() + +From: Michael Kelley + +commit 4dbd6a3e90e03130973688fd79e19425f720d999 upstream. + +Current code re-calculates the size after aligning the starting and +ending physical addresses on a page boundary. But the re-calculation +also embeds the masking of high order bits that exceed the size of +the physical address space (via PHYSICAL_PAGE_MASK). If the masking +removes any high order bits, the size calculation results in a huge +value that is likely to immediately fail. + +Fix this by re-calculating the page-aligned size first. Then mask any +high order bits using PHYSICAL_PAGE_MASK. + +Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode") +Signed-off-by: Michael Kelley +Signed-off-by: Borislav Petkov +Acked-by: Dave Hansen +Cc: +Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microsoft.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/ioremap.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/ioremap.c ++++ b/arch/x86/mm/ioremap.c +@@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_ad + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; +- phys_addr &= PHYSICAL_PAGE_MASK; ++ phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + ++ /* ++ * Mask out any bits not part of the actual physical ++ * address, like memory encryption bits. ++ */ ++ phys_addr &= PHYSICAL_PAGE_MASK; ++ + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, + pcm, &new_pcm); + if (retval) { diff --git a/queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch b/queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch new file mode 100644 index 00000000000..0bf10c05714 --- /dev/null +++ b/queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch @@ -0,0 +1,98 @@ +From 50bcceb7724e471d9b591803889df45dcbb584bc Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Tue, 15 Nov 2022 11:17:06 -0800 +Subject: x86/pm: Add enumeration check before spec MSRs save/restore setup + +From: Pawan Gupta + +commit 50bcceb7724e471d9b591803889df45dcbb584bc upstream. + +pm_save_spec_msr() keeps a list of all the MSRs which _might_ need +to be saved and restored at hibernate and resume. However, it has +zero awareness of CPU support for these MSRs. It mostly works by +unconditionally attempting to manipulate these MSRs and relying on +rdmsrl_safe() being able to handle a #GP on CPUs where the support is +unavailable. + +However, it's possible for reads (RDMSR) to be supported for a given MSR +while writes (WRMSR) are not. In this case, msr_build_context() sees +a successful read (RDMSR) and marks the MSR as valid. Then, later, a +write (WRMSR) fails, producing a nasty (but harmless) error message. +This causes restore_processor_state() to try and restore it, but writing +this MSR is not allowed on the Intel Atom N2600 leading to: + + unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) \ + at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20) + Call Trace: + + restore_processor_state + x86_acpi_suspend_lowlevel + acpi_suspend_enter + suspend_devices_and_enter + pm_suspend.cold + state_store + kernfs_fop_write_iter + vfs_write + ksys_write + do_syscall_64 + ? do_syscall_64 + ? up_read + ? lock_is_held_type + ? asm_exc_page_fault + ? lockdep_hardirqs_on + entry_SYSCALL_64_after_hwframe + +To fix this, add the corresponding X86_FEATURE bit for each MSR. Avoid +trying to manipulate the MSR when the feature bit is clear. This +required adding a X86_FEATURE bit for MSRs that do not have one already, +but it's a small price to pay. + + [ bp: Move struct msr_enumeration inside the only function that uses it. ] + +Fixes: 73924ec4d560 ("x86/pm: Save the MSR validity status at context setup") +Reported-by: Hans de Goede +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Reviewed-by: Dave Hansen +Acked-by: Rafael J. Wysocki +Cc: +Link: https://lore.kernel.org/r/c24db75d69df6e66c0465e13676ad3f2837a2ed8.1668539735.git.pawan.kumar.gupta@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/power/cpu.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86 + + static void pm_save_spec_msr(void) + { +- u32 spec_msr_id[] = { +- MSR_IA32_SPEC_CTRL, +- MSR_IA32_TSX_CTRL, +- MSR_TSX_FORCE_ABORT, +- MSR_IA32_MCU_OPT_CTRL, +- MSR_AMD64_LS_CFG, +- MSR_AMD64_DE_CFG, ++ struct msr_enumeration { ++ u32 msr_no; ++ u32 feature; ++ } msr_enum[] = { ++ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, ++ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, ++ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, ++ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, ++ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, ++ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, + }; ++ int i; + +- msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); ++ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { ++ if (boot_cpu_has(msr_enum[i].feature)) ++ msr_build_context(&msr_enum[i].msr_no, 1); ++ } + } + + static int pm_check_save_msr(void) diff --git a/queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch b/queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch new file mode 100644 index 00000000000..702193df0a7 --- /dev/null +++ b/queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch @@ -0,0 +1,115 @@ +From aaa65d17eec372c6a9756833f3964ba05b05ea14 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Tue, 15 Nov 2022 11:17:05 -0800 +Subject: x86/tsx: Add a feature bit for TSX control MSR support + +From: Pawan Gupta + +commit aaa65d17eec372c6a9756833f3964ba05b05ea14 upstream. + +Support for the TSX control MSR is enumerated in MSR_IA32_ARCH_CAPABILITIES. +This is different from how other CPU features are enumerated i.e. via +CPUID. Currently, a call to tsx_ctrl_is_supported() is required for +enumerating the feature. In the absence of a feature bit for TSX control, +any code that relies on checking feature bits directly will not work. + +In preparation for adding a feature bit check in MSR save/restore +during suspend/resume, set a new feature bit X86_FEATURE_TSX_CTRL when +MSR_IA32_TSX_CTRL is present. Also make tsx_ctrl_is_supported() use the +new feature bit to avoid any overhead of reading the MSR. + + [ bp: Remove tsx_ctrl_is_supported(), add room for two more feature + bits in word 11 which are coming up in the next merge window. ] + +Suggested-by: Andrew Cooper +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Reviewed-by: Dave Hansen +Cc: +Link: https://lore.kernel.org/r/de619764e1d98afbb7a5fa58424f1278ede37b45.1668539735.git.pawan.kumar.gupta@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/cpufeatures.h | 3 ++ + arch/x86/kernel/cpu/tsx.c | 38 ++++++++++++++++--------------------- + 2 files changed, 20 insertions(+), 21 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -305,6 +305,9 @@ + #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ + #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + ++ ++#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ ++ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ + #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -58,24 +58,6 @@ static void tsx_enable(void) + wrmsrl(MSR_IA32_TSX_CTRL, tsx); + } + +-static bool tsx_ctrl_is_supported(void) +-{ +- u64 ia32_cap = x86_read_arch_cap_msr(); +- +- /* +- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this +- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. +- * +- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a +- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES +- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get +- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, +- * tsx= cmdline requests will do nothing on CPUs without +- * MSR_IA32_TSX_CTRL support. +- */ +- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +-} +- + static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) + { + if (boot_cpu_has_bug(X86_BUG_TAA)) +@@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) + rdmsrl(MSR_TSX_FORCE_ABORT, msr); + msr |= MSR_TFA_TSX_CPUID_CLEAR; + wrmsrl(MSR_TSX_FORCE_ABORT, msr); +- } else if (tsx_ctrl_is_supported()) { ++ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { + rdmsrl(MSR_IA32_TSX_CTRL, msr); + msr |= TSX_CTRL_CPUID_CLEAR; + wrmsrl(MSR_IA32_TSX_CTRL, msr); +@@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) + u64 mcu_opt_ctrl; + + /* Check if RTM_ALLOW exists */ +- if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || ++ if (!boot_cpu_has_bug(X86_BUG_TAA) || ++ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || + !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) + return; + +@@ -191,7 +174,20 @@ void __init tsx_init(void) + return; + } + +- if (!tsx_ctrl_is_supported()) { ++ /* ++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this ++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. ++ * ++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a ++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES ++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get ++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, ++ * tsx= cmdline requests will do nothing on CPUs without ++ * MSR_IA32_TSX_CTRL support. ++ */ ++ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { ++ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); ++ } else { + tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; + return; + } -- 2.47.3