]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Nov 2022 12:32:33 +0000 (13:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 Nov 2022 12:32:33 +0000 (13:32 +0100)
added patches:
loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch
loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch
mm-correctly-charge-compressed-memory-to-its-memcg.patch
mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch
x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch
x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch
x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch

queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch [new file with mode: 0644]
queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch [new file with mode: 0644]
queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch [new file with mode: 0644]
queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch [new file with mode: 0644]
queue-6.0/series
queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch [new file with mode: 0644]
queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch [new file with mode: 0644]
queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch [new file with mode: 0644]

diff --git a/queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch b/queue-6.0/loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch
new file mode 100644 (file)
index 0000000..4fa8cf1
--- /dev/null
@@ -0,0 +1,97 @@
+From e428e9613531d1ef6bd0d91352899712b29134fb Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Mon, 21 Nov 2022 19:02:57 +0800
+Subject: LoongArch: Clear FPU/SIMD thread info flags for kernel thread
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit e428e9613531d1ef6bd0d91352899712b29134fb upstream.
+
+If a kernel thread is created by a user thread, it may carry FPU/SIMD
+thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be
+considered as a fpu owner and kernel try to save its FPU/SIMD context
+and cause such errors:
+
+[   41.518931] do_fpu invoked from kernel context![#1]:
+[   41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217
+[   41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022
+[   41.544064] $ 0   : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0
+[   41.552101] $ 4   : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818
+[   41.560138] $ 8   : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc
+[   41.568174] $12   : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130
+[   41.576211] $16   : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000
+[   41.584247] $20   : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400
+[   41.592284] $24   : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0
+[   41.600320] $28   : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140
+[   41.608356] era   : 9000000000228848 _save_fp+0x0/0xd8
+[   41.613542] ra    : 90000000011e9468 __schedule+0x568/0x8d0
+[   41.619160] CSR crmd: 000000b0
+[   41.619163] CSR prmd: 00000000
+[   41.622359] CSR euen: 00000000
+[   41.625558] CSR ecfg: 00071c1c
+[   41.628756] CSR estat: 000f0000
+[   41.635239] ExcCode : f (SubCode 0)
+[   41.638783] PrId  : 0014c010 (Loongson-64bit)
+[   41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs
+[   41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be)
+[   41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0
+[   41.670412]         0000000000000001 0000000000000000 9000000106535880 0000000000000001
+[   41.678450]         9000000105291800 0000000000000000 9000000105291838 900000000178e000
+[   41.686487]         9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834
+[   41.694523]         00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830
+[   41.702561]         0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0
+[   41.710597]         9000000128800001 9000000106305140 9000000105291838 9000000105291838
+[   41.718634]         9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0
+[   41.726672]         9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933
+[   41.734708]         0000000000000000 0000000000000000 0000000000000000 0000000000000000
+[   41.742745]         ...
+[   41.745252] Call Trace:
+[   42.197868] [<9000000000228848>] _save_fp+0x0/0xd8
+[   42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0
+[   42.210485] [<90000000011ed834>] schedule+0x64/0xd4
+[   42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188
+[   42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350
+[   42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c
+
+This can be easily triggered by ltp testcase syscalls/io_uring02 and it
+can also be easily fixed by clearing the FPU/SIMD thread info flags for
+kernel threads in copy_thread().
+
+Cc: stable@vger.kernel.org
+Reported-by: Qi Hu <huqi@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kernel/process.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/loongarch/kernel/process.c
++++ b/arch/loongarch/kernel/process.c
+@@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, c
+               childregs->csr_crmd = p->thread.csr_crmd;
+               childregs->csr_prmd = p->thread.csr_prmd;
+               childregs->csr_ecfg = p->thread.csr_ecfg;
+-              return 0;
++              goto out;
+       }
+       /* user thread */
+@@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, c
+        */
+       childregs->csr_euen = 0;
++      if (clone_flags & CLONE_SETTLS)
++              childregs->regs[2] = tls;
++
++out:
+       clear_tsk_thread_flag(p, TIF_USEDFPU);
+       clear_tsk_thread_flag(p, TIF_USEDSIMD);
+       clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
+       clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+-      if (clone_flags & CLONE_SETTLS)
+-              childregs->regs[2] = tls;
+-
+       return 0;
+ }
diff --git a/queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch b/queue-6.0/loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch
new file mode 100644 (file)
index 0000000..236e02d
--- /dev/null
@@ -0,0 +1,55 @@
+From bf2f34a506e66e2979de6b17c337c5d4b25b4d2c Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Mon, 21 Nov 2022 19:02:57 +0800
+Subject: LoongArch: Set _PAGE_DIRTY only if _PAGE_WRITE is set in {pmd,pte}_mkdirty()
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit bf2f34a506e66e2979de6b17c337c5d4b25b4d2c upstream.
+
+Now {pmd,pte}_mkdirty() set _PAGE_DIRTY bit unconditionally, this causes
+random segmentation fault after commit 0ccf7f168e17bb7e ("mm/thp: carry
+over dirty bit when thp splits on pmd").
+
+The reason is: when fork(), parent process use pmd_wrprotect() to clear
+huge page's _PAGE_WRITE and _PAGE_DIRTY (for COW); then pte_mkdirty() set
+_PAGE_DIRTY as well as _PAGE_MODIFIED while splitting dirty huge pages;
+once _PAGE_DIRTY is set, there will be no tlb modify exception so the COW
+machanism fails; and at last memory corruption occurred between parent
+and child processes.
+
+So, we should set _PAGE_DIRTY only when _PAGE_WRITE is set in {pmd,pte}_
+mkdirty().
+
+Cc: stable@vger.kernel.org
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/include/asm/pgtable.h |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/loongarch/include/asm/pgtable.h
++++ b/arch/loongarch/include/asm/pgtable.h
+@@ -349,7 +349,9 @@ static inline pte_t pte_mkclean(pte_t pt
+ static inline pte_t pte_mkdirty(pte_t pte)
+ {
+-      pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
++      pte_val(pte) |= _PAGE_MODIFIED;
++      if (pte_val(pte) & _PAGE_WRITE)
++              pte_val(pte) |= _PAGE_DIRTY;
+       return pte;
+ }
+@@ -475,7 +477,9 @@ static inline pmd_t pmd_mkclean(pmd_t pm
+ static inline pmd_t pmd_mkdirty(pmd_t pmd)
+ {
+-      pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED);
++      pmd_val(pmd) |= _PAGE_MODIFIED;
++      if (pmd_val(pmd) & _PAGE_WRITE)
++              pmd_val(pmd) |= _PAGE_DIRTY;
+       return pmd;
+ }
diff --git a/queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch b/queue-6.0/mm-correctly-charge-compressed-memory-to-its-memcg.patch
new file mode 100644 (file)
index 0000000..fb96ec1
--- /dev/null
@@ -0,0 +1,45 @@
+From cd08d80ecdac577bad2e8d6805c7a3859fdefb8d Mon Sep 17 00:00:00 2001
+From: Li Liguang <liliguang@baidu.com>
+Date: Mon, 14 Nov 2022 14:48:28 -0500
+Subject: mm: correctly charge compressed memory to its memcg
+
+From: Li Liguang <liliguang@baidu.com>
+
+commit cd08d80ecdac577bad2e8d6805c7a3859fdefb8d upstream.
+
+Kswapd will reclaim memory when memory pressure is high, the annonymous
+memory will be compressed and stored in the zpool if zswap is enabled.
+The memcg_kmem_bypass() in get_obj_cgroup_from_page() will bypass the
+kernel thread and cause the compressed memory not be charged to its memory
+cgroup.
+
+Remove the memcg_kmem_bypass() call and properly charge compressed memory
+to its corresponding memory cgroup.
+
+Link: https://lore.kernel.org/linux-mm/CALvZod4nnn8BHYqAM4xtcR0Ddo2-Wr8uKm9h_CHWUaXw7g_DCg@mail.gmail.com/
+Link: https://lkml.kernel.org/r/20221114194828.100822-1-hannes@cmpxchg.org
+Fixes: f4840ccfca25 ("zswap: memcg accounting")
+Signed-off-by: Li Liguang <liliguang@baidu.com>
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: <stable@vger.kernel.org>   [5.19+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2971,7 +2971,7 @@ struct obj_cgroup *get_obj_cgroup_from_p
+ {
+       struct obj_cgroup *objcg;
+-      if (!memcg_kmem_enabled() || memcg_kmem_bypass())
++      if (!memcg_kmem_enabled())
+               return NULL;
+       if (PageMemcgKmem(page)) {
diff --git a/queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch b/queue-6.0/mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch
new file mode 100644 (file)
index 0000000..56880e1
--- /dev/null
@@ -0,0 +1,165 @@
+From ea4452de2ae987342fadbdd2c044034e6480daad Mon Sep 17 00:00:00 2001
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+Date: Fri, 18 Nov 2022 18:00:11 +0800
+Subject: mm: fix unexpected changes to {failslab|fail_page_alloc}.attr
+
+From: Qi Zheng <zhengqi.arch@bytedance.com>
+
+commit ea4452de2ae987342fadbdd2c044034e6480daad upstream.
+
+When we specify __GFP_NOWARN, we only expect that no warnings will be
+issued for current caller.  But in the __should_failslab() and
+__should_fail_alloc_page(), the local GFP flags alter the global
+{failslab|fail_page_alloc}.attr, which is persistent and shared by all
+tasks.  This is not what we expected, let's fix it.
+
+[akpm@linux-foundation.org: unexport should_fail_ex()]
+Link: https://lkml.kernel.org/r/20221118100011.2634-1-zhengqi.arch@bytedance.com
+Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
+Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Akinobu Mita <akinobu.mita@gmail.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fault-inject.h |    7 +++++--
+ lib/fault-inject.c           |   13 ++++++++-----
+ mm/failslab.c                |   12 ++++++++++--
+ mm/page_alloc.c              |    7 +++++--
+ 4 files changed, 28 insertions(+), 11 deletions(-)
+
+--- a/include/linux/fault-inject.h
++++ b/include/linux/fault-inject.h
+@@ -20,7 +20,6 @@ struct fault_attr {
+       atomic_t space;
+       unsigned long verbose;
+       bool task_filter;
+-      bool no_warn;
+       unsigned long stacktrace_depth;
+       unsigned long require_start;
+       unsigned long require_end;
+@@ -32,6 +31,10 @@ struct fault_attr {
+       struct dentry *dname;
+ };
++enum fault_flags {
++      FAULT_NOWARN =  1 << 0,
++};
++
+ #define FAULT_ATTR_INITIALIZER {                                      \
+               .interval = 1,                                          \
+               .times = ATOMIC_INIT(1),                                \
+@@ -40,11 +43,11 @@ struct fault_attr {
+               .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,       \
+               .verbose = 2,                                           \
+               .dname = NULL,                                          \
+-              .no_warn = false,                                       \
+       }
+ #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
+ int setup_fault_attr(struct fault_attr *attr, char *str);
++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
+ bool should_fail(struct fault_attr *attr, ssize_t size);
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+--- a/lib/fault-inject.c
++++ b/lib/fault-inject.c
+@@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
+ static void fail_dump(struct fault_attr *attr)
+ {
+-      if (attr->no_warn)
+-              return;
+-
+       if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
+               printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
+                      "name %pd, interval %lu, probability %lu, "
+@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struc
+  * http://www.nongnu.org/failmalloc/
+  */
+-bool should_fail(struct fault_attr *attr, ssize_t size)
++bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
+ {
+       if (in_task()) {
+               unsigned int fail_nth = READ_ONCE(current->fail_nth);
+@@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr
+               return false;
+ fail:
+-      fail_dump(attr);
++      if (!(flags & FAULT_NOWARN))
++              fail_dump(attr);
+       if (atomic_read(&attr->times) != -1)
+               atomic_dec_not_zero(&attr->times);
+       return true;
+ }
++
++bool should_fail(struct fault_attr *attr, ssize_t size)
++{
++      return should_fail_ex(attr, size, 0);
++}
+ EXPORT_SYMBOL_GPL(should_fail);
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+--- a/mm/failslab.c
++++ b/mm/failslab.c
+@@ -16,6 +16,8 @@ static struct {
+ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+ {
++      int flags = 0;
++
+       /* No fault-injection for bootstrap cache */
+       if (unlikely(s == kmem_cache))
+               return false;
+@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache
+       if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
+               return false;
++      /*
++       * In some cases, it expects to specify __GFP_NOWARN
++       * to avoid printing any information(not just a warning),
++       * thus avoiding deadlocks. See commit 6b9dbedbe349 for
++       * details.
++       */
+       if (gfpflags & __GFP_NOWARN)
+-              failslab.attr.no_warn = true;
++              flags |= FAULT_NOWARN;
+-      return should_fail(&failslab.attr, s->object_size);
++      return should_fail_ex(&failslab.attr, s->object_size, flags);
+ }
+ static int __init setup_failslab(char *str)
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3883,6 +3883,8 @@ __setup("fail_page_alloc=", setup_fail_p
+ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+ {
++      int flags = 0;
++
+       if (order < fail_page_alloc.min_order)
+               return false;
+       if (gfp_mask & __GFP_NOFAIL)
+@@ -3893,10 +3895,11 @@ static bool __should_fail_alloc_page(gfp
+                       (gfp_mask & __GFP_DIRECT_RECLAIM))
+               return false;
++      /* See comment in __should_failslab() */
+       if (gfp_mask & __GFP_NOWARN)
+-              fail_page_alloc.attr.no_warn = true;
++              flags |= FAULT_NOWARN;
+-      return should_fail(&fail_page_alloc.attr, 1 << order);
++      return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
+ }
+ #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
index 28a92ec2bac365adba20efc171a48e8185cbfda0..282fb134ceb03bee1d112a9e6f80d57408bfc531 100644 (file)
@@ -210,3 +210,10 @@ kvm-x86-nsvm-harden-svm_free_nested-against-freeing-vmcb02-while-still-in-use.pa
 kvm-x86-add-kvm_leave_nested.patch
 kvm-x86-remove-exit_int_info-warning-in-svm_handle_exit.patch
 kvm-update-gfn_to_pfn_cache-khva-when-it-moves-within-the-same-page.patch
+x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch
+x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch
+x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch
+mm-fix-unexpected-changes-to-failslab-fail_page_alloc-.attr.patch
+mm-correctly-charge-compressed-memory-to-its-memcg.patch
+loongarch-clear-fpu-simd-thread-info-flags-for-kernel-thread.patch
+loongarch-set-_page_dirty-only-if-_page_write-is-set-in-pmd-pte-_mkdirty.patch
diff --git a/queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch b/queue-6.0/x86-ioremap-fix-page-aligned-size-calculation-in-__ioremap_caller.patch
new file mode 100644 (file)
index 0000000..d1f64ba
--- /dev/null
@@ -0,0 +1,49 @@
+From 4dbd6a3e90e03130973688fd79e19425f720d999 Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mikelley@microsoft.com>
+Date: Wed, 16 Nov 2022 10:41:24 -0800
+Subject: x86/ioremap: Fix page aligned size calculation in __ioremap_caller()
+
+From: Michael Kelley <mikelley@microsoft.com>
+
+commit 4dbd6a3e90e03130973688fd79e19425f720d999 upstream.
+
+Current code re-calculates the size after aligning the starting and
+ending physical addresses on a page boundary. But the re-calculation
+also embeds the masking of high order bits that exceed the size of
+the physical address space (via PHYSICAL_PAGE_MASK). If the masking
+removes any high order bits, the size calculation results in a huge
+value that is likely to immediately fail.
+
+Fix this by re-calculating the page-aligned size first. Then mask any
+high order bits using PHYSICAL_PAGE_MASK.
+
+Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode")
+Signed-off-by: Michael Kelley <mikelley@microsoft.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microsoft.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/mm/ioremap.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/ioremap.c
++++ b/arch/x86/mm/ioremap.c
+@@ -216,9 +216,15 @@ __ioremap_caller(resource_size_t phys_ad
+        * Mappings have to be page-aligned
+        */
+       offset = phys_addr & ~PAGE_MASK;
+-      phys_addr &= PHYSICAL_PAGE_MASK;
++      phys_addr &= PAGE_MASK;
+       size = PAGE_ALIGN(last_addr+1) - phys_addr;
++      /*
++       * Mask out any bits not part of the actual physical
++       * address, like memory encryption bits.
++       */
++      phys_addr &= PHYSICAL_PAGE_MASK;
++
+       retval = memtype_reserve(phys_addr, (u64)phys_addr + size,
+                                               pcm, &new_pcm);
+       if (retval) {
diff --git a/queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch b/queue-6.0/x86-pm-add-enumeration-check-before-spec-msrs-save-restore-setup.patch
new file mode 100644 (file)
index 0000000..0bf10c0
--- /dev/null
@@ -0,0 +1,98 @@
+From 50bcceb7724e471d9b591803889df45dcbb584bc Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 15 Nov 2022 11:17:06 -0800
+Subject: x86/pm: Add enumeration check before spec MSRs save/restore setup
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 50bcceb7724e471d9b591803889df45dcbb584bc upstream.
+
+pm_save_spec_msr() keeps a list of all the MSRs which _might_ need
+to be saved and restored at hibernate and resume. However, it has
+zero awareness of CPU support for these MSRs. It mostly works by
+unconditionally attempting to manipulate these MSRs and relying on
+rdmsrl_safe() being able to handle a #GP on CPUs where the support is
+unavailable.
+
+However, it's possible for reads (RDMSR) to be supported for a given MSR
+while writes (WRMSR) are not. In this case, msr_build_context() sees
+a successful read (RDMSR) and marks the MSR as valid. Then, later, a
+write (WRMSR) fails, producing a nasty (but harmless) error message.
+This causes restore_processor_state() to try and restore it, but writing
+this MSR is not allowed on the Intel Atom N2600 leading to:
+
+  unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) \
+     at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20)
+  Call Trace:
+   <TASK>
+   restore_processor_state
+   x86_acpi_suspend_lowlevel
+   acpi_suspend_enter
+   suspend_devices_and_enter
+   pm_suspend.cold
+   state_store
+   kernfs_fop_write_iter
+   vfs_write
+   ksys_write
+   do_syscall_64
+   ? do_syscall_64
+   ? up_read
+   ? lock_is_held_type
+   ? asm_exc_page_fault
+   ? lockdep_hardirqs_on
+   entry_SYSCALL_64_after_hwframe
+
+To fix this, add the corresponding X86_FEATURE bit for each MSR.  Avoid
+trying to manipulate the MSR when the feature bit is clear. This
+required adding a X86_FEATURE bit for MSRs that do not have one already,
+but it's a small price to pay.
+
+  [ bp: Move struct msr_enumeration inside the only function that uses it. ]
+
+Fixes: 73924ec4d560 ("x86/pm: Save the MSR validity status at context setup")
+Reported-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/c24db75d69df6e66c0465e13676ad3f2837a2ed8.1668539735.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86
+ static void pm_save_spec_msr(void)
+ {
+-      u32 spec_msr_id[] = {
+-              MSR_IA32_SPEC_CTRL,
+-              MSR_IA32_TSX_CTRL,
+-              MSR_TSX_FORCE_ABORT,
+-              MSR_IA32_MCU_OPT_CTRL,
+-              MSR_AMD64_LS_CFG,
+-              MSR_AMD64_DE_CFG,
++      struct msr_enumeration {
++              u32 msr_no;
++              u32 feature;
++      } msr_enum[] = {
++              { MSR_IA32_SPEC_CTRL,    X86_FEATURE_MSR_SPEC_CTRL },
++              { MSR_IA32_TSX_CTRL,     X86_FEATURE_MSR_TSX_CTRL },
++              { MSR_TSX_FORCE_ABORT,   X86_FEATURE_TSX_FORCE_ABORT },
++              { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL },
++              { MSR_AMD64_LS_CFG,      X86_FEATURE_LS_CFG_SSBD },
++              { MSR_AMD64_DE_CFG,      X86_FEATURE_LFENCE_RDTSC },
+       };
++      int i;
+-      msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
++      for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
++              if (boot_cpu_has(msr_enum[i].feature))
++                      msr_build_context(&msr_enum[i].msr_no, 1);
++      }
+ }
+ static int pm_check_save_msr(void)
diff --git a/queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch b/queue-6.0/x86-tsx-add-a-feature-bit-for-tsx-control-msr-support.patch
new file mode 100644 (file)
index 0000000..702193d
--- /dev/null
@@ -0,0 +1,115 @@
+From aaa65d17eec372c6a9756833f3964ba05b05ea14 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Tue, 15 Nov 2022 11:17:05 -0800
+Subject: x86/tsx: Add a feature bit for TSX control MSR support
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit aaa65d17eec372c6a9756833f3964ba05b05ea14 upstream.
+
+Support for the TSX control MSR is enumerated in MSR_IA32_ARCH_CAPABILITIES.
+This is different from how other CPU features are enumerated i.e. via
+CPUID. Currently, a call to tsx_ctrl_is_supported() is required for
+enumerating the feature. In the absence of a feature bit for TSX control,
+any code that relies on checking feature bits directly will not work.
+
+In preparation for adding a feature bit check in MSR save/restore
+during suspend/resume, set a new feature bit X86_FEATURE_TSX_CTRL when
+MSR_IA32_TSX_CTRL is present. Also make tsx_ctrl_is_supported() use the
+new feature bit to avoid any overhead of reading the MSR.
+
+  [ bp: Remove tsx_ctrl_is_supported(), add room for two more feature
+    bits in word 11 which are coming up in the next merge window. ]
+
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: <stable@kernel.org>
+Link: https://lore.kernel.org/r/de619764e1d98afbb7a5fa58424f1278ede37b45.1668539735.git.pawan.kumar.gupta@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h |    3 ++
+ arch/x86/kernel/cpu/tsx.c          |   38 ++++++++++++++++---------------------
+ 2 files changed, 20 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -305,6 +305,9 @@
+ #define X86_FEATURE_USE_IBPB_FW               (11*32+16) /* "" Use IBPB during runtime firmware calls */
+ #define X86_FEATURE_RSB_VMEXIT_LITE   (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
++
++#define X86_FEATURE_MSR_TSX_CTRL      (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
++
+ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+ #define X86_FEATURE_AVX_VNNI          (12*32+ 4) /* AVX VNNI instructions */
+ #define X86_FEATURE_AVX512_BF16               (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -58,24 +58,6 @@ static void tsx_enable(void)
+       wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+ }
+-static bool tsx_ctrl_is_supported(void)
+-{
+-      u64 ia32_cap = x86_read_arch_cap_msr();
+-
+-      /*
+-       * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
+-       * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+-       *
+-       * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+-       * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+-       * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+-       * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+-       * tsx= cmdline requests will do nothing on CPUs without
+-       * MSR_IA32_TSX_CTRL support.
+-       */
+-      return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+-}
+-
+ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+ {
+       if (boot_cpu_has_bug(X86_BUG_TAA))
+@@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void)
+               rdmsrl(MSR_TSX_FORCE_ABORT, msr);
+               msr |= MSR_TFA_TSX_CPUID_CLEAR;
+               wrmsrl(MSR_TSX_FORCE_ABORT, msr);
+-      } else if (tsx_ctrl_is_supported()) {
++      } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) {
+               rdmsrl(MSR_IA32_TSX_CTRL, msr);
+               msr |= TSX_CTRL_CPUID_CLEAR;
+               wrmsrl(MSR_IA32_TSX_CTRL, msr);
+@@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void)
+       u64 mcu_opt_ctrl;
+       /* Check if RTM_ALLOW exists */
+-      if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() ||
++      if (!boot_cpu_has_bug(X86_BUG_TAA) ||
++          !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) ||
+           !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL))
+               return;
+@@ -191,7 +174,20 @@ void __init tsx_init(void)
+               return;
+       }
+-      if (!tsx_ctrl_is_supported()) {
++      /*
++       * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
++       * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
++       *
++       * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
++       * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
++       * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
++       * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
++       * tsx= cmdline requests will do nothing on CPUs without
++       * MSR_IA32_TSX_CTRL support.
++       */
++      if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) {
++              setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL);
++      } else {
+               tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
+               return;
+       }