--- /dev/null
+From e428e9613531d1ef6bd0d91352899712b29134fb Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Mon, 21 Nov 2022 19:02:57 +0800
+Subject: LoongArch: Clear FPU/SIMD thread info flags for kernel thread
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit e428e9613531d1ef6bd0d91352899712b29134fb upstream.
+
+If a kernel thread is created by a user thread, it may carry FPU/SIMD
+thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be
+considered as a fpu owner and kernel try to save its FPU/SIMD context
+and cause such errors:
+
+[ 41.518931] do_fpu invoked from kernel context![#1]:
+[ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217
+[ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022
+[ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0
+[ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818
+[ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc
+[ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130
+[ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000
+[ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400
+[ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0
+[ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140
+[ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8
+[ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0
+[ 41.619160] CSR crmd: 000000b0
+[ 41.619163] CSR prmd: 00000000
+[ 41.622359] CSR euen: 00000000
+[ 41.625558] CSR ecfg: 00071c1c
+[ 41.628756] CSR estat: 000f0000
+[ 41.635239] ExcCode : f (SubCode 0)
+[ 41.638783] PrId : 0014c010 (Loongson-64bit)
+[ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs
+[ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be)
+[ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0
+[ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001
+[ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000
+[ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834
+[ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830
+[ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0
+[ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838
+[ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0
+[ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933
+[ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+[ 41.742745] ...
+[ 41.745252] Call Trace:
+[ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8
+[ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0
+[ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4
+[ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188
+[ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350
+[ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c
+
+This can be easily triggered by ltp testcase syscalls/io_uring02 and it
+can also be easily fixed by clearing the FPU/SIMD thread info flags for
+kernel threads in copy_thread().
+
+Cc: stable@vger.kernel.org
+Reported-by: Qi Hu <huqi@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/process.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/loongarch/kernel/process.c
++++ b/arch/loongarch/kernel/process.c
+@@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, c
+ childregs->csr_crmd = p->thread.csr_crmd;
+ childregs->csr_prmd = p->thread.csr_prmd;
+ childregs->csr_ecfg = p->thread.csr_ecfg;
+- return 0;
++ goto out;
+ }
+
+ /* user thread */
+@@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, c
+ */
+ childregs->csr_euen = 0;
+
++ if (clone_flags & CLONE_SETTLS)
++ childregs->regs[2] = tls;
++
++out:
+ clear_tsk_thread_flag(p, TIF_USEDFPU);
+ clear_tsk_thread_flag(p, TIF_USEDSIMD);
+ clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
+ clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+
+- if (clone_flags & CLONE_SETTLS)
+- childregs->regs[2] = tls;
+-
+ return 0;
+ }
+
--- /dev/null
+From bf2f34a506e66e2979de6b17c337c5d4b25b4d2c Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Mon, 21 Nov 2022 19:02:57 +0800
+Subject: LoongArch: Set _PAGE_DIRTY only if _PAGE_WRITE is set in {pmd,pte}_mkdirty()
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit bf2f34a506e66e2979de6b17c337c5d4b25b4d2c upstream.
+
+Now {pmd,pte}_mkdirty() set _PAGE_DIRTY bit unconditionally, this causes
+random segmentation fault after commit 0ccf7f168e17bb7e ("mm/thp: carry
+over dirty bit when thp splits on pmd").
+
+The reason is: when fork(), parent process use pmd_wrprotect() to clear
+huge page's _PAGE_WRITE and _PAGE_DIRTY (for COW); then pte_mkdirty() set
+_PAGE_DIRTY as well as _PAGE_MODIFIED while splitting dirty huge pages;
+once _PAGE_DIRTY is set, there will be no tlb modify exception so the COW
+machanism fails; and at last memory corruption occurred between parent
+and child processes.
+
+So, we should set _PAGE_DIRTY only when _PAGE_WRITE is set in {pmd,pte}_
+mkdirty().
+
+Cc: stable@vger.kernel.org
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/pgtable.h | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/pgtable.h
++++ b/arch/loongarch/include/asm/pgtable.h
+@@ -349,7 +349,9 @@ static inline pte_t pte_mkclean(pte_t pt
+
+ static inline pte_t pte_mkdirty(pte_t pte)
+ {
+- pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
++ pte_val(pte) |= _PAGE_MODIFIED;
++ if (pte_val(pte) & _PAGE_WRITE)
++ pte_val(pte) |= _PAGE_DIRTY;
+ return pte;
+ }
+
+@@ -475,7 +477,9 @@ static inline pmd_t pmd_mkclean(pmd_t pm
+
+ static inline pmd_t pmd_mkdirty(pmd_t pmd)
+ {
+- pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
++ pmd_val(pmd) |= _PAGE_MODIFIED;
++ if (pmd_val(pmd) & _PAGE_WRITE)
++ pmd_val(pmd) |= _PAGE_DIRTY;
+ return pmd;
+ }
+
--- /dev/null
+From cd08d80ecdac577bad2e8d6805c7a3859fdefb8d Mon Sep 17 00:00:00 2001
+From: Li Liguang <liliguang@baidu.com>
+Date: Mon, 14 Nov 2022 14:48:28 -0500
+Subject: mm: correctly charge compressed memory to its memcg
+
+From: Li Liguang <liliguang@baidu.com>
+
+commit cd08d80ecdac577bad2e8d6805c7a3859fdefb8d upstream.
+
+Kswapd will reclaim memory when memory pressure is high, the annonymous
+memory will be compressed and stored in the zpool if zswap is enabled.
+The memcg_kmem_bypass() in get_obj_cgroup_from_page() will bypass the
+kernel thread and cause the compressed memory not be charged to its memory
+cgroup.
+
+Remove the memcg_kmem_bypass() call and properly charge compressed memory
+to its corresponding memory cgroup.
+
+Link: https://lore.kernel.org/linux-mm/CALvZod4nnn8BHYqAM4xtcR0Ddo2-Wr8uKm9h_CHWUaXw7g_DCg@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20221114194828.100822-1-hannes@cmpxchg.org
+Fixes: f4840ccfca25 ("zswap: memcg accounting")
+Signed-off-by: Li Liguang <liliguang@baidu.com>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: <stable@vger.kernel.org> [5.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2971,7 +2971,7 @@ struct obj_cgroup *get_obj_cgroup_from_p
+ {
+ struct obj_cgroup *objcg;
+
+- if (!memcg_kmem_enabled() || memcg_kmem_bypass())
++ if (!memcg_kmem_enabled())
+ return NULL;
+
+ if (PageMemcgKmem(page)) {
--- /dev/null
+From ea4452de2ae987342fadbdd2c044034e6480daad Mon Sep 17 00:00:00 2001
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+Date: Fri, 18 Nov 2022 18:00:11 +0800
+Subject: mm: fix unexpected changes to {failslab|fail_page_alloc}.attr
+
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+
+commit ea4452de2ae987342fadbdd2c044034e6480daad upstream.
+
+When we specify __GFP_NOWARN, we only expect that no warnings will be
+issued for current caller. But in the __should_failslab() and
+__should_fail_alloc_page(), the local GFP flags alter the global
+{failslab|fail_page_alloc}.attr, which is persistent and shared by all
+tasks. This is not what we expected, let's fix it.
+
+[akpm@linux-foundation.org: unexport should_fail_ex()]
+Link: https://lkml.kernel.org/r/20221118100011.2634-1-zhengqi.arch@bytedance.com
+Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
+Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Akinobu Mita <akinobu.mita@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fault-inject.h | 7 +++++--
+ lib/fault-inject.c | 13 ++++++++-----
+ mm/failslab.c | 12 ++++++++++--
+ mm/page_alloc.c | 7 +++++--
+ 4 files changed, 28 insertions(+), 11 deletions(-)
+
+--- a/include/linux/fault-inject.h
++++ b/include/linux/fault-inject.h
+@@ -20,7 +20,6 @@ struct fault_attr {
+ atomic_t space;
+ unsigned long verbose;
+ bool task_filter;
+- bool no_warn;
+ unsigned long stacktrace_depth;
+ unsigned long require_start;
+ unsigned long require_end;
+@@ -32,6 +31,10 @@ struct fault_attr {
+ struct dentry *dname;
+ };
+
++enum fault_flags {
++ FAULT_NOWARN = 1 << 0,
++};
++
+ #define FAULT_ATTR_INITIALIZER { \
+ .interval = 1, \
+ .times = ATOMIC_INIT(1), \
+@@ -40,11 +43,11 @@ struct fault_attr {
+ .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
+ .verbose = 2, \
+ .dname = NULL, \
+- .no_warn = false, \
+ }
+
+ #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
+ int setup_fault_attr(struct fault_attr *attr, char *str);
++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
+ bool should_fail(struct fault_attr *attr, ssize_t size);
+
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+--- a/lib/fault-inject.c
++++ b/lib/fault-inject.c
+@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
+
+ static void fail_dump(struct fault_attr *attr)
+ {
+- if (attr->no_warn)
+- return;
+-
+ if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
+ printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
+ "name %pd, interval %lu, probability %lu, "
+@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struc
+ * http://www.nongnu.org/failmalloc/
+ */
+
+-bool should_fail(struct fault_attr *attr, ssize_t size)
++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
+ {
+ if (in_task()) {
+ unsigned int fail_nth = READ_ONCE(current->fail_nth);
+@@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr
+ return false;
+
+ fail:
+- fail_dump(attr);
++ if (!(flags & FAULT_NOWARN))
++ fail_dump(attr);
+
+ if (atomic_read(&attr->times) != -1)
+ atomic_dec_not_zero(&attr->times);
+
+ return true;
+ }
++
++bool should_fail(struct fault_attr *attr, ssize_t size)
++{
++ return should_fail_ex(attr, size, 0);
++}
+ EXPORT_SYMBOL_GPL(should_fail);
+
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+--- a/mm/failslab.c
++++ b/mm/failslab.c
+@@ -16,6 +16,8 @@ static struct {
+
+ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+ {
++ int flags = 0;
++
+ /* No fault-injection for bootstrap cache */
+ if (unlikely(s == kmem_cache))
+ return false;
+@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache
+ if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
+ return false;
+
++ /*
++ * In some cases, it expects to specify __GFP_NOWARN
++ * to avoid printing any information(not just a warning),
++ * thus avoiding deadlocks. See commit 6b9dbedbe349 for
++ * details.
++ */
+ if (gfpflags & __GFP_NOWARN)
+- failslab.attr.no_warn = true;
++ flags |= FAULT_NOWARN;
+
+- return should_fail(&failslab.attr, s->object_size);
++ return should_fail_ex(&failslab.attr, s->object_size, flags);
+ }
+
+ static int __init setup_failslab(char *str)
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3883,6 +3883,8 @@ __setup("fail_page_alloc=", setup_fail_p
+
+ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+ {
++ int flags = 0;
++
+ if (order < fail_page_alloc.min_order)
+ return false;
+ if (gfp_mask & __GFP_NOFAIL)
+@@ -3893,10 +3895,11 @@ static bool __should_fail_alloc_page(gfp
+ (gfp_mask & __GFP_DIRECT_RECLAIM))
+ return false;
+
++ /* See comment in __should_failslab() */
+ if (gfp_mask & __GFP_NOWARN)
+- fail_page_alloc.attr.no_warn = true;
++ flags |= FAULT_NOWARN;
+
+- return should_fail(&fail_page_alloc.attr, 1 << order);
++ return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
+ }
+
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
kvm-x86-add-kvm_leave_nested.patch
kvm-x86-remove-exit_int_info-warning-in-svm_handle_exit.patch
kvm-update-gfn_to_pfn_cache-khva-when-it-moves-within-the-same-page.patch
+x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch
+x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch
+x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch
+mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch
+mm-correctly-charge-compressed-memory-to-its-memcg.patch
+loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch
+loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch
--- /dev/null
+From 4dbd6a3e90e03130973688fd79e19425f720d999 Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mikelley@microsoft.com>
+Date: Wed, 16 Nov 2022 10:41:24 -0800
+Subject: x86/ioremap: Fix page aligned size calculation in __ioremap_caller()
+
+From: Michael Kelley <mikelley@microsoft.com>
+
+commit 4dbd6a3e90e03130973688fd79e19425f720d999 upstream.
+
+Current code re-calculates the size after aligning the starting and
+ending physical addresses on a page boundary. But the re-calculation
+also embeds the masking of high order bits that exceed the size of
+the physical address space (via PHYSICAL_PAGE_MASK). If the masking
+removes any high order bits, the size calculation results in a huge
+value that is likely to immediately fail.
+
+Fix this by re-calculating the page-aligned size first. Then mask any
+high order bits using PHYSICAL_PAGE_MASK.
+
+Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode")
+Signed-off-by: Michael Kelley <mikelley@microsoft.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microsoft.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/ioremap.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/ioremap.c
++++ b/arch/x86/mm/ioremap.c
+@@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_ad
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+- phys_addr &= PHYSICAL_PAGE_MASK;
++ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+
++ /*
++ * Mask out any bits not part of the actual physical
++ * address, like memory encryption bits.
++ */
++ phys_addr &= PHYSICAL_PAGE_MASK;
++
+ retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
+ pcm, &new_pcm);
+ if (retval) {
--- /dev/null
+From 50bcceb7724e471d9b591803889df45dcbb584bc Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 15 Nov 2022 11:17:06 -0800
+Subject: x86/pm: Add enumeration check before spec MSRs save/restore setup
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 50bcceb7724e471d9b591803889df45dcbb584bc upstream.
+
+pm_save_spec_msr() keeps a list of all the MSRs which _might_ need
+to be saved and restored at hibernate and resume. However, it has
+zero awareness of CPU support for these MSRs. It mostly works by
+unconditionally attempting to manipulate these MSRs and relying on
+rdmsrl_safe() being able to handle a #GP on CPUs where the support is
+unavailable.
+
+However, it's possible for reads (RDMSR) to be supported for a given MSR
+while writes (WRMSR) are not. In this case, msr_build_context() sees
+a successful read (RDMSR) and marks the MSR as valid. Then, later, a
+write (WRMSR) fails, producing a nasty (but harmless) error message.
+This causes restore_processor_state() to try and restore it, but writing
+this MSR is not allowed on the Intel Atom N2600 leading to:
+
+ unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) \
+ at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20)
+ Call Trace:
+ <TASK>
+ restore_processor_state
+ x86_acpi_suspend_lowlevel
+ acpi_suspend_enter
+ suspend_devices_and_enter
+ pm_suspend.cold
+ state_store
+ kernfs_fop_write_iter
+ vfs_write
+ ksys_write
+ do_syscall_64
+ ? do_syscall_64
+ ? up_read
+ ? lock_is_held_type
+ ? asm_exc_page_fault
+ ? lockdep_hardirqs_on
+ entry_SYSCALL_64_after_hwframe
+
+To fix this, add the corresponding X86_FEATURE bit for each MSR. Avoid
+trying to manipulate the MSR when the feature bit is clear. This
+required adding a X86_FEATURE bit for MSRs that do not have one already,
+but it's a small price to pay.
+
+ [ bp: Move struct msr_enumeration inside the only function that uses it. ]
+
+Fixes: 73924ec4d560 ("x86/pm: Save the MSR validity status at context setup")
+Reported-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/c24db75d69df6e66c0465e13676ad3f2837a2ed8.1668539735.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86
+
+ static void pm_save_spec_msr(void)
+ {
+- u32 spec_msr_id[] = {
+- MSR_IA32_SPEC_CTRL,
+- MSR_IA32_TSX_CTRL,
+- MSR_TSX_FORCE_ABORT,
+- MSR_IA32_MCU_OPT_CTRL,
+- MSR_AMD64_LS_CFG,
+- MSR_AMD64_DE_CFG,
++ struct msr_enumeration {
++ u32 msr_no;
++ u32 feature;
++ } msr_enum[] = {
++ { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL },
++ { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL },
++ { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT },
++ { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL },
++ { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD },
++ { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC },
+ };
++ int i;
+
+- msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
++ for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
++ if (boot_cpu_has(msr_enum[i].feature))
++ msr_build_context(&msr_enum[i].msr_no, 1);
++ }
+ }
+
+ static int pm_check_save_msr(void)
--- /dev/null
+From aaa65d17eec372c6a9756833f3964ba05b05ea14 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 15 Nov 2022 11:17:05 -0800
+Subject: x86/tsx: Add a feature bit for TSX control MSR support
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit aaa65d17eec372c6a9756833f3964ba05b05ea14 upstream.
+
+Support for the TSX control MSR is enumerated in MSR_IA32_ARCH_CAPABILITIES.
+This is different from how other CPU features are enumerated i.e. via
+CPUID. Currently, a call to tsx_ctrl_is_supported() is required for
+enumerating the feature. In the absence of a feature bit for TSX control,
+any code that relies on checking feature bits directly will not work.
+
+In preparation for adding a feature bit check in MSR save/restore
+during suspend/resume, set a new feature bit X86_FEATURE_TSX_CTRL when
+MSR_IA32_TSX_CTRL is present. Also make tsx_ctrl_is_supported() use the
+new feature bit to avoid any overhead of reading the MSR.
+
+ [ bp: Remove tsx_ctrl_is_supported(), add room for two more feature
+ bits in word 11 which are coming up in the next merge window. ]
+
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/de619764e1d98afbb7a5fa58424f1278ede37b45.1668539735.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h | 3 ++
+ arch/x86/kernel/cpu/tsx.c | 38 ++++++++++++++++---------------------
+ 2 files changed, 20 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -305,6 +305,9 @@
+ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
+ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+
++
++#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
++
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
+ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -58,24 +58,6 @@ static void tsx_enable(void)
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ }
+
+-static bool tsx_ctrl_is_supported(void)
+-{
+- u64 ia32_cap = x86_read_arch_cap_msr();
+-
+- /*
+- * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+- * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+- *
+- * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+- * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+- * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+- * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+- * tsx= cmdline requests will do nothing on CPUs without
+- * MSR_IA32_TSX_CTRL support.
+- */
+- return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+-}
+-
+ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+ {
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+@@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void)
+ rdmsrl(MSR_TSX_FORCE_ABORT, msr);
+ msr |= MSR_TFA_TSX_CPUID_CLEAR;
+ wrmsrl(MSR_TSX_FORCE_ABORT, msr);
+- } else if (tsx_ctrl_is_supported()) {
++ } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
+ rdmsrl(MSR_IA32_TSX_CTRL, msr);
+ msr |= TSX_CTRL_CPUID_CLEAR;
+ wrmsrl(MSR_IA32_TSX_CTRL, msr);
+@@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void)
+ u64 mcu_opt_ctrl;
+
+ /* Check if RTM_ALLOW exists */
+- if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() ||
++ if (!boot_cpu_has_bug(X86_BUG_TAA) ||
++ !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) ||
+ !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
+ return;
+
+@@ -191,7 +174,20 @@ void __init tsx_init(void)
+ return;
+ }
+
+- if (!tsx_ctrl_is_supported()) {
++ /*
++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
++ *
++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
++ * tsx= cmdline requests will do nothing on CPUs without
++ * MSR_IA32_TSX_CTRL support.
++ */
++ if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) {
++ setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
++ } else {
+ tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
+ return;
+ }