From: Greg Kroah-Hartman Date: Tue, 10 Dec 2024 09:47:06 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.6.65~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c383109880170110d3e9242c55ab13e2e62c6fa7;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: kasan-make-report_lock-a-raw-spinlock.patch lib-stackinit-hide-never-taken-branch-from-compiler.patch mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch mm-fix-vrealloc-s-kasan-poisoning-logic.patch mm-gup-handle-null-pages-in-unpin_user_pages.patch mm-memcg-declare-do_memsw_account-inline.patch mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch mm-open-code-page_folio-in-dump_page.patch mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch mm-respect-mmap-hint-address-when-aligning-for-thp.patch ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch --- diff --git a/queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch b/queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch new file mode 100644 index 00000000000..3af3b01072a --- /dev/null +++ b/queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch @@ -0,0 +1,66 @@ +From e30a0361b8515d424c73c67de1a43e45a13b8ba2 Mon Sep 17 00:00:00 2001 +From: Jared Kangas +Date: Tue, 19 Nov 2024 13:02:34 -0800 +Subject: kasan: make report_lock a raw spinlock + +From: Jared Kangas + +commit e30a0361b8515d424c73c67de1a43e45a13b8ba2 upstream. + +If PREEMPT_RT is enabled, report_lock is a sleeping spinlock and must not +be locked when IRQs are disabled. However, KASAN reports may be triggered +in such contexts. For example: + + char *s = kzalloc(1, GFP_KERNEL); + kfree(s); + local_irq_disable(); + char c = *s; /* KASAN report here leads to spin_lock() */ + local_irq_enable(); + +Make report_spinlock a raw spinlock to prevent rescheduling when +PREEMPT_RT is enabled. + +Link: https://lkml.kernel.org/r/20241119210234.1602529-1-jkangas@redhat.com +Fixes: 342a93247e08 ("locking/spinlock: Provide RT variant header: ") +Signed-off-by: Jared Kangas +Cc: Alexander Potapenko +Cc: Andrey Konovalov +Cc: Andrey Ryabinin +Cc: Dmitry Vyukov +Cc: Vincenzo Frascino +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kasan/report.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/kasan/report.c ++++ b/mm/kasan/report.c +@@ -200,7 +200,7 @@ static inline void fail_non_kasan_kunit_ + + #endif /* CONFIG_KUNIT */ + +-static DEFINE_SPINLOCK(report_lock); ++static DEFINE_RAW_SPINLOCK(report_lock); + + static void start_report(unsigned long *flags, bool sync) + { +@@ -211,7 +211,7 @@ static void start_report(unsigned long * + lockdep_off(); + /* Make sure we don't end up in loop. */ + report_suppress_start(); +- spin_lock_irqsave(&report_lock, *flags); ++ raw_spin_lock_irqsave(&report_lock, *flags); + pr_err("==================================================================\n"); + } + +@@ -221,7 +221,7 @@ static void end_report(unsigned long *fl + trace_error_report_end(ERROR_DETECTOR_KASAN, + (unsigned long)addr); + pr_err("==================================================================\n"); +- spin_unlock_irqrestore(&report_lock, *flags); ++ raw_spin_unlock_irqrestore(&report_lock, *flags); + if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + check_panic_on_warn("KASAN"); + switch (kasan_arg_fault) { diff --git a/queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch b/queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch new file mode 100644 index 00000000000..e8d52b6252a --- /dev/null +++ b/queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch @@ -0,0 +1,40 @@ +From 5c3793604f91123bf49bc792ce697a0bef4c173c Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Sun, 17 Nov 2024 03:38:13 -0800 +Subject: lib: stackinit: hide never-taken branch from compiler + +From: Kees Cook + +commit 5c3793604f91123bf49bc792ce697a0bef4c173c upstream. + +The never-taken branch leads to an invalid bounds condition, which is by +design. To avoid the unwanted warning from the compiler, hide the +variable from the optimizer. + +../lib/stackinit_kunit.c: In function 'do_nothing_u16_zero': +../lib/stackinit_kunit.c:51:49: error: array subscript 1 is outside array bounds of 'u16[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds=] + 51 | #define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) + | ^~~~~~ +../lib/stackinit_kunit.c:219:24: note: in expansion of macro 'DO_NOTHING_RETURN_SCALAR' + 219 | return DO_NOTHING_RETURN_ ## which(ptr + 1); \ + | ^~~~~~~~~~~~~~~~~~ + +Link: https://lkml.kernel.org/r/20241117113813.work.735-kees@kernel.org +Signed-off-by: Kees Cook +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/stackinit_kunit.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/lib/stackinit_kunit.c ++++ b/lib/stackinit_kunit.c +@@ -212,6 +212,7 @@ static noinline void test_ ## name (stru + static noinline DO_NOTHING_TYPE_ ## which(var_type) \ + do_nothing_ ## name(var_type *ptr) \ + { \ ++ OPTIMIZER_HIDE_VAR(ptr); \ + /* Will always be true, but compiler doesn't know. */ \ + if ((unsigned long)ptr > 0x2) \ + return DO_NOTHING_RETURN_ ## which(ptr); \ diff --git a/queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch b/queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch new file mode 100644 index 00000000000..d5a58b7b70f --- /dev/null +++ b/queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch @@ -0,0 +1,43 @@ +From 6535b8669c1a74078098517174e53fc907ce9d56 Mon Sep 17 00:00:00 2001 +From: Akinobu Mita +Date: Fri, 15 Nov 2024 10:20:23 -0800 +Subject: mm/damon: fix order of arguments in damos_before_apply tracepoint + +From: Akinobu Mita + +commit 6535b8669c1a74078098517174e53fc907ce9d56 upstream. + +Since the order of the scheme_idx and target_idx arguments in TP_ARGS is +reversed, they are stored in the trace record in reverse. + +Link: https://lkml.kernel.org/r/20241115182023.43118-1-sj@kernel.org +Link: https://patch.msgid.link/20241112154828.40307-1-akinobu.mita@gmail.com +Fixes: c603c630b509 ("mm/damon/core: add a tracepoint for damos apply target regions") +Signed-off-by: Akinobu Mita +Signed-off-by: SeongJae Park +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Steven Rostedt +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/trace/events/damon.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h +index 23200aabccac..da4bd9fd1162 100644 +--- a/include/trace/events/damon.h ++++ b/include/trace/events/damon.h +@@ -15,7 +15,7 @@ TRACE_EVENT_CONDITION(damos_before_apply, + unsigned int target_idx, struct damon_region *r, + unsigned int nr_regions, bool do_trace), + +- TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace), ++ TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_regions, do_trace), + + TP_CONDITION(do_trace), + +-- +2.47.1 + diff --git a/queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch b/queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch new file mode 100644 index 00000000000..658c1477257 --- /dev/null +++ b/queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch @@ -0,0 +1,54 @@ +From d699440f58ce9bd71103cc7b692e3ab76a20bfcd Mon Sep 17 00:00:00 2001 +From: Andrii Nakryiko +Date: Mon, 25 Nov 2024 16:52:06 -0800 +Subject: mm: fix vrealloc()'s KASAN poisoning logic + +From: Andrii Nakryiko + +commit d699440f58ce9bd71103cc7b692e3ab76a20bfcd upstream. + +When vrealloc() reuses already allocated vmap_area, we need to re-annotate +poisoned and unpoisoned portions of underlying memory according to the new +size. + +This results in a KASAN splat recorded at [1]. A KASAN mis-reporting +issue where there is none. + +Note, hard-coding KASAN_VMALLOC_PROT_NORMAL might not be exactly correct, +but KASAN flag logic is pretty involved and spread out throughout +__vmalloc_node_range_noprof(), so I'm using the bare minimum flag here and +leaving the rest to mm people to refactor this logic and reuse it here. + +Link: https://lkml.kernel.org/r/20241126005206.3457974-1-andrii@kernel.org +Link: https://lore.kernel.org/bpf/67450f9b.050a0220.21d33d.0004.GAE@google.com/ [1] +Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()") +Signed-off-by: Andrii Nakryiko +Cc: Alexei Starovoitov +Cc: Christoph Hellwig +Cc: Michal Hocko +Cc: Uladzislau Rezki (Sony) +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/mm/vmalloc.c b/mm/vmalloc.c +index 7ed39d104201..f009b21705c1 100644 +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -4093,7 +4093,8 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) + /* Zero out spare memory. */ + if (want_init_on_alloc(flags)) + memset((void *)p + size, 0, old_size - size); +- ++ kasan_poison_vmalloc(p + size, old_size - size); ++ kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL); + return (void *)p; + } + +-- +2.47.1 + diff --git a/queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch b/queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch new file mode 100644 index 00000000000..5f0f8af9375 --- /dev/null +++ b/queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch @@ -0,0 +1,98 @@ +From a1268be280d8e484ab3606d7476edd0f14bb9961 Mon Sep 17 00:00:00 2001 +From: John Hubbard +Date: Wed, 20 Nov 2024 19:49:33 -0800 +Subject: mm/gup: handle NULL pages in unpin_user_pages() + +From: John Hubbard + +commit a1268be280d8e484ab3606d7476edd0f14bb9961 upstream. + +The recent addition of "pofs" (pages or folios) handling to gup has a +flaw: it assumes that unpin_user_pages() handles NULL pages in the pages** +array. That's not the case, as I discovered when I ran on a new +configuration on my test machine. + +Fix this by skipping NULL pages in unpin_user_pages(), just like +unpin_folios() already does. + +Details: when booting on x86 with "numa=fake=2 movablecore=4G" on Linux +6.12, and running this: + + tools/testing/selftests/mm/gup_longterm + +...I get the following crash: + +BUG: kernel NULL pointer dereference, address: 0000000000000008 +RIP: 0010:sanity_check_pinned_pages+0x3a/0x2d0 +... +Call Trace: + + ? __die_body+0x66/0xb0 + ? page_fault_oops+0x30c/0x3b0 + ? do_user_addr_fault+0x6c3/0x720 + ? irqentry_enter+0x34/0x60 + ? exc_page_fault+0x68/0x100 + ? asm_exc_page_fault+0x22/0x30 + ? sanity_check_pinned_pages+0x3a/0x2d0 + unpin_user_pages+0x24/0xe0 + check_and_migrate_movable_pages_or_folios+0x455/0x4b0 + __gup_longterm_locked+0x3bf/0x820 + ? mmap_read_lock_killable+0x12/0x50 + ? __pfx_mmap_read_lock_killable+0x10/0x10 + pin_user_pages+0x66/0xa0 + gup_test_ioctl+0x358/0xb20 + __se_sys_ioctl+0x6b/0xc0 + do_syscall_64+0x7b/0x150 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Link: https://lkml.kernel.org/r/20241121034933.77502-1-jhubbard@nvidia.com +Fixes: 94efde1d1539 ("mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases") +Signed-off-by: John Hubbard +Acked-by: David Hildenbrand +Cc: Oscar Salvador +Cc: Vivek Kasireddy +Cc: Dave Airlie +Cc: Gerd Hoffmann +Cc: Matthew Wilcox +Cc: Christoph Hellwig +Cc: Jason Gunthorpe +Cc: Peter Xu +Cc: Arnd Bergmann +Cc: Daniel Vetter +Cc: Dongwon Kim +Cc: Hugh Dickins +Cc: Junxiao Chang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/gup.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -52,7 +52,12 @@ static inline void sanity_check_pinned_p + */ + for (; npages; npages--, pages++) { + struct page *page = *pages; +- struct folio *folio = page_folio(page); ++ struct folio *folio; ++ ++ if (!page) ++ continue; ++ ++ folio = page_folio(page); + + if (is_zero_page(page) || + !folio_test_anon(folio)) +@@ -409,6 +414,10 @@ void unpin_user_pages(struct page **page + + sanity_check_pinned_pages(pages, npages); + for (i = 0; i < npages; i += nr) { ++ if (!pages[i]) { ++ nr = 1; ++ continue; ++ } + folio = gup_folio_next(pages, npages, i, &nr); + gup_put_folio(folio, nr, FOLL_PIN); + } diff --git a/queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch b/queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch new file mode 100644 index 00000000000..10abaa00e31 --- /dev/null +++ b/queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch @@ -0,0 +1,50 @@ +From 89dd878282881306c38f7e354e7614fca98cb9a6 Mon Sep 17 00:00:00 2001 +From: John Sperbeck +Date: Thu, 28 Nov 2024 12:39:59 -0800 +Subject: mm: memcg: declare do_memsw_account inline + +From: John Sperbeck + +commit 89dd878282881306c38f7e354e7614fca98cb9a6 upstream. + +In commit 66d60c428b23 ("mm: memcg: move legacy memcg event code into +memcontrol-v1.c"), the static do_memsw_account() function was moved from a +.c file to a .h file. Unfortunately, the traditional inline keyword +wasn't added. If a file (e.g., a unit test) includes the .h file, but +doesn't refer to do_memsw_account(), it will get a warning like: + +mm/memcontrol-v1.h:41:13: warning: unused function 'do_memsw_account' [-Wunused-function] + 41 | static bool do_memsw_account(void) + | ^~~~~~~~~~~~~~~~ + +Link: https://lkml.kernel.org/r/20241128203959.726527-1-jsperbeck@google.com +Fixes: 66d60c428b23 ("mm: memcg: move legacy memcg event code into memcontrol-v1.c") +Signed-off-by: John Sperbeck +Acked-by: Roman Gushchin +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Muchun Song +Cc: Shakeel Butt +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol-v1.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h +index 0e3b82951d91..144d71b65907 100644 +--- a/mm/memcontrol-v1.h ++++ b/mm/memcontrol-v1.h +@@ -38,7 +38,7 @@ void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n); + iter = mem_cgroup_iter(NULL, iter, NULL)) + + /* Whether legacy memory+swap accounting is active */ +-static bool do_memsw_account(void) ++static inline bool do_memsw_account(void) + { + return !cgroup_subsys_on_dfl(memory_cgrp_subsys); + } +-- +2.47.1 + diff --git a/queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch b/queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch new file mode 100644 index 00000000000..ece2b00441f --- /dev/null +++ b/queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch @@ -0,0 +1,74 @@ +From 091c1dd2d4df6edd1beebe0e5863d4034ade9572 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 20 Nov 2024 21:11:51 +0100 +Subject: mm/mempolicy: fix migrate_to_node() assuming there is at least one VMA in a MM + +From: David Hildenbrand + +commit 091c1dd2d4df6edd1beebe0e5863d4034ade9572 upstream. + +We currently assume that there is at least one VMA in a MM, which isn't +true. + +So we might end up having find_vma() return NULL, to then de-reference +NULL. So properly handle find_vma() returning NULL. + +This fixes the report: + +Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI +KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] +CPU: 1 UID: 0 PID: 6021 Comm: syz-executor284 Not tainted 6.12.0-rc7-syzkaller-00187-gf868cd251776 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 +RIP: 0010:migrate_to_node mm/mempolicy.c:1090 [inline] +RIP: 0010:do_migrate_pages+0x403/0x6f0 mm/mempolicy.c:1194 +Code: ... +RSP: 0018:ffffc9000375fd08 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffffc9000375fd78 RCX: 0000000000000000 +RDX: ffff88807e171300 RSI: dffffc0000000000 RDI: ffff88803390c044 +RBP: ffff88807e171428 R08: 0000000000000014 R09: fffffbfff2039ef1 +R10: ffffffff901cf78f R11: 0000000000000000 R12: 0000000000000003 +R13: ffffc9000375fe90 R14: ffffc9000375fe98 R15: ffffc9000375fdf8 +FS: 00005555919e1380(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005555919e1ca8 CR3: 000000007f12a000 CR4: 00000000003526f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + kernel_migrate_pages+0x5b2/0x750 mm/mempolicy.c:1709 + __do_sys_migrate_pages mm/mempolicy.c:1727 [inline] + __se_sys_migrate_pages mm/mempolicy.c:1723 [inline] + __x64_sys_migrate_pages+0x96/0x100 mm/mempolicy.c:1723 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +[akpm@linux-foundation.org: add unlikely()] +Link: https://lkml.kernel.org/r/20241120201151.9518-1-david@redhat.com +Fixes: 39743889aaf7 ("[PATCH] Swap Migration V5: sys_migrate_pages interface") +Signed-off-by: David Hildenbrand +Reported-by: syzbot+3511625422f7aa637f0d@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/lkml/673d2696.050a0220.3c9d61.012f.GAE@google.com/T/ +Reviewed-by: Liam R. Howlett +Reviewed-by: Christoph Lameter +Cc: Liam R. Howlett +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mempolicy.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -1080,6 +1080,10 @@ static long migrate_to_node(struct mm_st + + mmap_read_lock(mm); + vma = find_vma(mm, 0); ++ if (unlikely(!vma)) { ++ mmap_read_unlock(mm); ++ return 0; ++ } + + /* + * This does not migrate the range, but isolates all pages that diff --git a/queue-6.12/mm-open-code-page_folio-in-dump_page.patch b/queue-6.12/mm-open-code-page_folio-in-dump_page.patch new file mode 100644 index 00000000000..44a26d03e08 --- /dev/null +++ b/queue-6.12/mm-open-code-page_folio-in-dump_page.patch @@ -0,0 +1,60 @@ +From 6a7de1bf218d75f27f68d6a3f5ae1eb7332b941e Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Mon, 25 Nov 2024 20:17:19 +0000 +Subject: mm: open-code page_folio() in dump_page() + +From: Matthew Wilcox (Oracle) + +commit 6a7de1bf218d75f27f68d6a3f5ae1eb7332b941e upstream. + +page_folio() calls page_fixed_fake_head() which will misidentify this page +as being a fake head and load off the end of 'precise'. We may have a +pointer to a fake head, but that's OK because it contains the right +information for dump_page(). + +gcc-15 is smart enough to catch this with -Warray-bounds: + +In function 'page_fixed_fake_head', + inlined from '_compound_head' at ../include/linux/page-flags.h:251:24, + inlined from '__dump_page' at ../mm/debug.c:123:11: +../include/asm-generic/rwonce.h:44:26: warning: array subscript 9 is outside ++array bounds of 'struct page[1]' [-Warray-bounds=] + +Link: https://lkml.kernel.org/r/20241125201721.2963278-2-willy@infradead.org +Fixes: fae7d834c43c ("mm: add __dump_folio()") +Signed-off-by: Matthew Wilcox (Oracle) +Reported-by: Kees Cook +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/debug.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/mm/debug.c ++++ b/mm/debug.c +@@ -124,19 +124,22 @@ static void __dump_page(const struct pag + { + struct folio *foliop, folio; + struct page precise; ++ unsigned long head; + unsigned long pfn = page_to_pfn(page); + unsigned long idx, nr_pages = 1; + int loops = 5; + + again: + memcpy(&precise, page, sizeof(*page)); +- foliop = page_folio(&precise); +- if (foliop == (struct folio *)&precise) { ++ head = precise.compound_head; ++ if ((head & 1) == 0) { ++ foliop = (struct folio *)&precise; + idx = 0; + if (!folio_test_large(foliop)) + goto dump; + foliop = (struct folio *)page; + } else { ++ foliop = (struct folio *)(head - 1); + idx = folio_page_idx(foliop, page); + } + diff --git a/queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch b/queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch new file mode 100644 index 00000000000..44ccbff65a3 --- /dev/null +++ b/queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch @@ -0,0 +1,47 @@ +From 4de22b2a6a7477d84d9a01eb6b62a9117309d722 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Mon, 25 Nov 2024 20:17:18 +0000 +Subject: mm: open-code PageTail in folio_flags() and const_folio_flags() + +From: Matthew Wilcox (Oracle) + +commit 4de22b2a6a7477d84d9a01eb6b62a9117309d722 upstream. + +It is unsafe to call PageTail() in dump_page() as page_is_fake_head() will +almost certainly return true when called on a head page that is copied to +the stack. That will cause the VM_BUG_ON_PGFLAGS() in const_folio_flags() +to trigger when it shouldn't. Fortunately, we don't need to call +PageTail() here; it's fine to have a pointer to a virtual alias of the +page's flag word rather than the real page's flag word. + +Link: https://lkml.kernel.org/r/20241125201721.2963278-1-willy@infradead.org +Fixes: fae7d834c43c ("mm: add __dump_folio()") +Signed-off-by: Matthew Wilcox (Oracle) +Cc: Kees Cook +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/page-flags.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -306,7 +306,7 @@ static const unsigned long *const_folio_ + { + const struct page *page = &folio->page; + +- VM_BUG_ON_PGFLAGS(PageTail(page), page); ++ VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; + } +@@ -315,7 +315,7 @@ static unsigned long *folio_flags(struct + { + struct page *page = &folio->page; + +- VM_BUG_ON_PGFLAGS(PageTail(page), page); ++ VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; + } diff --git a/queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch b/queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch new file mode 100644 index 00000000000..5b80ec38339 --- /dev/null +++ b/queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch @@ -0,0 +1,111 @@ +From 249608ee47132cab3b1adacd9e463548f57bd316 Mon Sep 17 00:00:00 2001 +From: Kalesh Singh +Date: Mon, 18 Nov 2024 13:46:48 -0800 +Subject: mm: respect mmap hint address when aligning for THP + +From: Kalesh Singh + +commit 249608ee47132cab3b1adacd9e463548f57bd316 upstream. + +Commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP +boundaries") updated __get_unmapped_area() to align the start address for +the VMA to a PMD boundary if CONFIG_TRANSPARENT_HUGEPAGE=y. + +It does this by effectively looking up a region that is of size, +request_size + PMD_SIZE, and aligning up the start to a PMD boundary. + +Commit 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on +32 bit") opted out of this for 32bit due to regressions in mmap base +randomization. + +Commit d4148aeab412 ("mm, mmap: limit THP alignment of anonymous mappings +to PMD-aligned sizes") restricted this to only mmap sizes that are +multiples of the PMD_SIZE due to reported regressions in some performance +benchmarks -- which seemed mostly due to the reduced spatial locality of +related mappings due to the forced PMD-alignment. + +Another unintended side effect has emerged: When a user specifies an mmap +hint address, the THP alignment logic modifies the behavior, potentially +ignoring the hint even if a sufficiently large gap exists at the requested +hint location. + +Example Scenario: + +Consider the following simplified virtual address (VA) space: + + ... + + 0x200000-0x400000 --- VMA A + 0x400000-0x600000 --- Hole + 0x600000-0x800000 --- VMA B + + ... + +A call to mmap() with hint=0x400000 and len=0x200000 behaves differently: + + - Before THP alignment: The requested region (size 0x200000) fits into + the gap at 0x400000, so the hint is respected. + + - After alignment: The logic searches for a region of size + 0x400000 (len + PMD_SIZE) starting at 0x400000. + This search fails due to the mapping at 0x600000 (VMA B), and the hint + is ignored, falling back to arch_get_unmapped_area[_topdown](). + +In general the hint is effectively ignored, if there is any existing +mapping in the below range: + + [mmap_hint + mmap_size, mmap_hint + mmap_size + PMD_SIZE) + +This changes the semantics of mmap hint; from ""Respect the hint if a +sufficiently large gap exists at the requested location" to "Respect the +hint only if an additional PMD-sized gap exists beyond the requested +size". + +This has performance implications for allocators that allocate their heap +using mmap but try to keep it "as contiguous as possible" by using the end +of the exisiting heap as the address hint. With the new behavior it's +more likely to get a much less contiguous heap, adding extra fragmentation +and performance overhead. + +To restore the expected behavior; don't use +thp_get_unmapped_area_vmflags() when the user provided a hint address, for +anonymous mappings. + +Note: As Yang Shi pointed out: the issue still remains for filesystems +which are using thp_get_unmapped_area() for their get_unmapped_area() op. +It is unclear what worklaods will regress for if we ignore THP alignment +when the hint address is provided for such file backed mappings -- so this +fix will be handled separately. + +Link: https://lkml.kernel.org/r/20241118214650.3667577-1-kaleshsingh@google.com +Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") +Signed-off-by: Kalesh Singh +Reviewed-by: Rik van Riel +Reviewed-by: Vlastimil Babka +Reviewed-by: David Hildenbrand +Cc: Kefeng Wang +Cc: Vlastimil Babka +Cc: Yang Shi +Cc: Rik van Riel +Cc: Ryan Roberts +Cc: Suren Baghdasaryan +Cc: Minchan Kim +Cc: Hans Boehm +Cc: Lokesh Gidra +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mmap.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -901,6 +901,7 @@ __get_unmapped_area(struct file *file, u + if (get_area) { + addr = get_area(file, addr, len, pgoff, flags); + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ++ && !addr /* no hint */ + && IS_ALIGNED(len, PMD_SIZE)) { + /* Ensures that larger anonymous mappings are THP aligned. */ + addr = thp_get_unmapped_area_vmflags(file, addr, len, diff --git a/queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch b/queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch new file mode 100644 index 00000000000..21abce5fbc6 --- /dev/null +++ b/queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch @@ -0,0 +1,43 @@ +From 914eec5e980171bc128e7e24f7a22aa1d803570e Mon Sep 17 00:00:00 2001 +From: Wengang Wang +Date: Tue, 19 Nov 2024 09:45:00 -0800 +Subject: ocfs2: update seq_file index in ocfs2_dlm_seq_next + +From: Wengang Wang + +commit 914eec5e980171bc128e7e24f7a22aa1d803570e upstream. + +The following INFO level message was seen: + +seq_file: buggy .next function ocfs2_dlm_seq_next [ocfs2] did not +update position index + +Fix: +Update *pos (so m->index) to make seq_read_iter happy though the index its +self makes no sense to ocfs2_dlm_seq_next. + +Link: https://lkml.kernel.org/r/20241119174500.9198-1-wen.gang.wang@oracle.com +Signed-off-by: Wengang Wang +Reviewed-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/dlmglue.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/ocfs2/dlmglue.c ++++ b/fs/ocfs2/dlmglue.c +@@ -3110,6 +3110,7 @@ static void *ocfs2_dlm_seq_next(struct s + struct ocfs2_lock_res *iter = v; + struct ocfs2_lock_res *dummy = &priv->p_iter_res; + ++ (*pos)++; + spin_lock(&ocfs2_dlm_tracking_lock); + iter = ocfs2_dlm_next_res(iter, priv); + list_del_init(&dummy->l_debug_list); diff --git a/queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch b/queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch new file mode 100644 index 00000000000..72d08d99499 --- /dev/null +++ b/queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch @@ -0,0 +1,106 @@ +From 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 Mon Sep 17 00:00:00 2001 +From: Adrian Huang +Date: Wed, 13 Nov 2024 18:21:46 +0800 +Subject: sched/numa: fix memory leak due to the overwritten vma->numab_state + +From: Adrian Huang + +commit 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 upstream. + +[Problem Description] +When running the hackbench program of LTP, the following memory leak is +reported by kmemleak. + + # /opt/ltp/testcases/bin/hackbench 20 thread 1000 + Running with 20*40 (== 800) tasks. + + # dmesg | grep kmemleak + ... + kmemleak: 480 new suspected memory leaks (see /sys/kernel/debug/kmemleak) + kmemleak: 665 new suspected memory leaks (see /sys/kernel/debug/kmemleak) + + # cat /sys/kernel/debug/kmemleak + unreferenced object 0xffff888cd8ca2c40 (size 64): + comm "hackbench", pid 17142, jiffies 4299780315 + hex dump (first 32 bytes): + ac 74 49 00 01 00 00 00 4c 84 49 00 01 00 00 00 .tI.....L.I..... + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc bff18fd4): + [] __kmalloc_cache_noprof+0x2f9/0x3f0 + [] task_numa_work+0x725/0xa00 + [] task_work_run+0x58/0x90 + [] syscall_exit_to_user_mode+0x1c8/0x1e0 + [] do_syscall_64+0x85/0x150 + [] entry_SYSCALL_64_after_hwframe+0x76/0x7e + ... + +This issue can be consistently reproduced on three different servers: + * a 448-core server + * a 256-core server + * a 192-core server + +[Root Cause] +Since multiple threads are created by the hackbench program (along with +the command argument 'thread'), a shared vma might be accessed by two or +more cores simultaneously. When two or more cores observe that +vma->numab_state is NULL at the same time, vma->numab_state will be +overwritten. + +Although current code ensures that only one thread scans the VMAs in a +single 'numa_scan_period', there might be a chance for another thread +to enter in the next 'numa_scan_period' while we have not gotten till +numab_state allocation [1]. + +Note that the command `/opt/ltp/testcases/bin/hackbench 50 process 1000` +cannot the reproduce the issue. It is verified with 200+ test runs. + +[Solution] +Use the cmpxchg atomic operation to ensure that only one thread executes +the vma->numab_state assignment. + +[1] https://lore.kernel.org/lkml/1794be3c-358c-4cdc-a43d-a1f841d91ef7@amd.com/ + +Link: https://lkml.kernel.org/r/20241113102146.2384-1-ahuang12@lenovo.com +Fixes: ef6a22b70f6d ("sched/numa: apply the scan delay to every new vma") +Signed-off-by: Adrian Huang +Reported-by: Jiwei Sun +Reviewed-by: Raghavendra K T +Reviewed-by: Vlastimil Babka +Cc: Ben Segall +Cc: Dietmar Eggemann +Cc: Ingo Molnar +Cc: Juri Lelli +Cc: Mel Gorman +Cc: Peter Zijlstra +Cc: Steven Rostedt +Cc: Valentin Schneider +Cc: Vincent Guittot +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3399,10 +3399,16 @@ retry_pids: + + /* Initialise new per-VMA NUMAB state. */ + if (!vma->numab_state) { +- vma->numab_state = kzalloc(sizeof(struct vma_numab_state), +- GFP_KERNEL); +- if (!vma->numab_state) ++ struct vma_numab_state *ptr; ++ ++ ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); ++ if (!ptr) ++ continue; ++ ++ if (cmpxchg(&vma->numab_state, NULL, ptr)) { ++ kfree(ptr); + continue; ++ } + + vma->numab_state->start_scan_seq = mm->numa_scan_seq; + diff --git a/queue-6.12/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch b/queue-6.12/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch index 4e8b324ca87..f59af0632bd 100644 --- a/queue-6.12/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch +++ b/queue-6.12/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch @@ -26,19 +26,17 @@ Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- - drivers/ufs/host/tc-dwc-g210-pltfrm.c | 1 - - drivers/ufs/host/ufs-exynos.c | 1 - - drivers/ufs/host/ufs-mediatek.c | 1 - - drivers/ufs/host/ufs-qcom.c | 1 - - drivers/ufs/host/ufs-sprd.c | 1 - - drivers/ufs/host/ufshcd-pltfrm.c | 2 ++ + drivers/ufs/host/tc-dwc-g210-pltfrm.c | 1 - + drivers/ufs/host/ufs-exynos.c | 1 - + drivers/ufs/host/ufs-mediatek.c | 1 - + drivers/ufs/host/ufs-qcom.c | 1 - + drivers/ufs/host/ufs-sprd.c | 1 - + drivers/ufs/host/ufshcd-pltfrm.c | 2 ++ 6 files changed, 2 insertions(+), 5 deletions(-) -diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c -index 113e0ef7b2cf..c6f8565ede21 100644 --- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c +++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c -@@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev) +@@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struc */ static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev) { @@ -46,11 +44,9 @@ index 113e0ef7b2cf..c6f8565ede21 100644 ufshcd_pltfrm_remove(pdev); } -diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c -index b20f6526777a..9d4db13e142d 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c -@@ -1992,7 +1992,6 @@ static void exynos_ufs_remove(struct platform_device *pdev) +@@ -1963,7 +1963,6 @@ static void exynos_ufs_remove(struct pla struct ufs_hba *hba = platform_get_drvdata(pdev); struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -58,11 +54,9 @@ index b20f6526777a..9d4db13e142d 100644 ufshcd_pltfrm_remove(pdev); phy_power_off(ufs->phy); -diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c -index b444146419de..ffe4d03a0f38 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c -@@ -1879,7 +1879,6 @@ static int ufs_mtk_probe(struct platform_device *pdev) +@@ -1869,7 +1869,6 @@ out: */ static void ufs_mtk_remove(struct platform_device *pdev) { @@ -70,11 +64,9 @@ index b444146419de..ffe4d03a0f38 100644 ufshcd_pltfrm_remove(pdev); } -diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c -index 3762337d7576..73b4fec8221a 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c -@@ -1863,7 +1863,6 @@ static void ufs_qcom_remove(struct platform_device *pdev) +@@ -1845,7 +1845,6 @@ static void ufs_qcom_remove(struct platf struct ufs_hba *hba = platform_get_drvdata(pdev); struct ufs_qcom_host *host = ufshcd_get_variant(hba); @@ -82,11 +74,9 @@ index 3762337d7576..73b4fec8221a 100644 ufshcd_pltfrm_remove(pdev); if (host->esi_enabled) platform_device_msi_free_irqs_all(hba->dev); -diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c -index e455890cf7d4..d220978c2d8c 100644 --- a/drivers/ufs/host/ufs-sprd.c +++ b/drivers/ufs/host/ufs-sprd.c -@@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platform_device *pdev) +@@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platfor static void ufs_sprd_remove(struct platform_device *pdev) { @@ -94,11 +84,9 @@ index e455890cf7d4..d220978c2d8c 100644 ufshcd_pltfrm_remove(pdev); } -diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c -index bad5b1303eb6..b8dadd0a2f4c 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c -@@ -532,8 +532,10 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) +@@ -532,8 +532,10 @@ void ufshcd_pltfrm_remove(struct platfor { struct ufs_hba *hba = platform_get_drvdata(pdev); @@ -109,6 +97,3 @@ index bad5b1303eb6..b8dadd0a2f4c 100644 } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove); --- -2.47.1 - diff --git a/queue-6.12/series b/queue-6.12/series index 6769d92f6c1..04e9103c49b 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -180,7 +180,6 @@ drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch drm-amd-pm-fix-and-simplify-workload-handling.patch drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch -drm-amdgpu-rework-resume-handling-for-display-v2.patch drm-amd-display-correct-prefetch-calculation.patch drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch @@ -201,4 +200,19 @@ arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch mmc-core-further-prevent-card-detect-during-shutdown.patch x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch +ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch +stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch +lib-stackinit-hide-never-taken-branch-from-compiler.patch +sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch +kasan-make-report_lock-a-raw-spinlock.patch +mm-gup-handle-null-pages-in-unpin_user_pages.patch +mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch +x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch +x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch +mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch +mm-memcg-declare-do_memsw_account-inline.patch +mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch +mm-open-code-page_folio-in-dump_page.patch +mm-fix-vrealloc-s-kasan-poisoning-logic.patch +mm-respect-mmap-hint-address-when-aligning-for-thp.patch scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch diff --git a/queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch b/queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch new file mode 100644 index 00000000000..a0c4182f2a4 --- /dev/null +++ b/queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch @@ -0,0 +1,87 @@ +From 031e04bdc834cda3b054ef6b698503b2b97e8186 Mon Sep 17 00:00:00 2001 +From: Marco Elver +Date: Fri, 22 Nov 2024 16:39:47 +0100 +Subject: stackdepot: fix stack_depot_save_flags() in NMI context + +From: Marco Elver + +commit 031e04bdc834cda3b054ef6b698503b2b97e8186 upstream. + +Per documentation, stack_depot_save_flags() was meant to be usable from +NMI context if STACK_DEPOT_FLAG_CAN_ALLOC is unset. However, it still +would try to take the pool_lock in an attempt to save a stack trace in the +current pool (if space is available). + +This could result in deadlock if an NMI is handled while pool_lock is +already held. To avoid deadlock, only try to take the lock in NMI context +and give up if unsuccessful. + +The documentation is fixed to clearly convey this. + +Link: https://lkml.kernel.org/r/Z0CcyfbPqmxJ9uJH@elver.google.com +Link: https://lkml.kernel.org/r/20241122154051.3914732-1-elver@google.com +Fixes: 4434a56ec209 ("stackdepot: make fast paths lock-less again") +Signed-off-by: Marco Elver +Reported-by: Sebastian Andrzej Siewior +Reviewed-by: Sebastian Andrzej Siewior +Cc: Alexander Potapenko +Cc: Andrey Konovalov +Cc: Dmitry Vyukov +Cc: Oscar Salvador +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/stackdepot.h | 6 +++--- + lib/stackdepot.c | 10 +++++++++- + 2 files changed, 12 insertions(+), 4 deletions(-) + +--- a/include/linux/stackdepot.h ++++ b/include/linux/stackdepot.h +@@ -147,7 +147,7 @@ static inline int stack_depot_early_init + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. + * +- * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if ++ * Context: Any context, but unsetting STACK_DEPOT_FLAG_CAN_ALLOC is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case for contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). +@@ -156,7 +156,7 @@ static inline int stack_depot_early_init + */ + depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, + unsigned int nr_entries, +- gfp_t gfp_flags, ++ gfp_t alloc_flags, + depot_flags_t depot_flags); + + /** +@@ -175,7 +175,7 @@ depot_stack_handle_t stack_depot_save_fl + * Return: Handle of the stack trace stored in depot, 0 on failure + */ + depot_stack_handle_t stack_depot_save(unsigned long *entries, +- unsigned int nr_entries, gfp_t gfp_flags); ++ unsigned int nr_entries, gfp_t alloc_flags); + + /** + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct +--- a/lib/stackdepot.c ++++ b/lib/stackdepot.c +@@ -630,7 +630,15 @@ depot_stack_handle_t stack_depot_save_fl + prealloc = page_address(page); + } + +- raw_spin_lock_irqsave(&pool_lock, flags); ++ if (in_nmi()) { ++ /* We can never allocate in NMI context. */ ++ WARN_ON_ONCE(can_alloc); ++ /* Best effort; bail if we fail to take the lock. */ ++ if (!raw_spin_trylock_irqsave(&pool_lock, flags)) ++ goto exit; ++ } else { ++ raw_spin_lock_irqsave(&pool_lock, flags); ++ } + printk_deferred_enter(); + + /* Try to find again, to avoid concurrently inserting duplicates. */ diff --git a/queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch b/queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch new file mode 100644 index 00000000000..d3b35520b0f --- /dev/null +++ b/queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch @@ -0,0 +1,48 @@ +From 73da582a476ea6e3512f89f8ed57dfed945829a2 Mon Sep 17 00:00:00 2001 +From: Fernando Fernandez Mancera +Date: Mon, 2 Dec 2024 14:58:45 +0000 +Subject: x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC + +From: Fernando Fernandez Mancera + +commit 73da582a476ea6e3512f89f8ed57dfed945829a2 upstream. + +The rework of possible CPUs management erroneously disabled SMP when the +IO/APIC is disabled either by the 'noapic' command line parameter or during +IO/APIC setup. SMP is possible without IO/APIC. + +Remove the ioapic_is_disabled conditions from the relevant possible CPU +management code paths to restore the orgininal behaviour. + +Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") +Signed-off-by: Fernando Fernandez Mancera +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20241202145905.1482-1-ffmancera@riseup.net +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/topology.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/cpu/topology.c ++++ b/arch/x86/kernel/cpu/topology.c +@@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limit + { + unsigned int possible = nr_cpu_ids; + +- /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ +- if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) ++ /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */ ++ if (!setup_max_cpus || apic_is_disabled) + possible = 1; + + /* 'possible_cpus=N' */ +@@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limit + + static __init bool restrict_to_up(void) + { +- if (!smp_found_config || ioapic_is_disabled) ++ if (!smp_found_config) + return true; + /* + * XEN PV is special as it does not advertise the local APIC diff --git a/queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch b/queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch new file mode 100644 index 00000000000..2593a86d3db --- /dev/null +++ b/queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch @@ -0,0 +1,114 @@ +From d0ceea662d459726487030237689835fcc0483e5 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Wed, 4 Dec 2024 11:27:14 +0000 +Subject: x86/mm: Add _PAGE_NOPTISHADOW bit to avoid updating userspace page tables + +From: David Woodhouse + +commit d0ceea662d459726487030237689835fcc0483e5 upstream. + +The set_p4d() and set_pgd() functions (in 4-level or 5-level page table setups +respectively) assume that the root page table is actually a 8KiB allocation, +with the userspace root immediately after the kernel root page table (so that +the former can enforce NX on on all the subordinate page tables, which are +actually shared). + +However, users of the kernel_ident_mapping_init() code do not give it an 8KiB +allocation for its PGD. Both swsusp_arch_resume() and acpi_mp_setup_reset() +allocate only a single 4KiB page. The kexec code on x86_64 currently gets +away with it purely by chance, because it allocates 8KiB for its "control +code page" and then actually uses the first half for the PGD, then copies the +actual trampoline code into the second half only after the identmap code has +finished scribbling over it. + +Fix this by defining a _PAGE_NOPTISHADOW bit (which can use the same bit as +_PAGE_SAVED_DIRTY since one is only for the PGD/P4D root and the other is +exclusively for leaf PTEs.). This instructs __pti_set_user_pgtbl() not to +write to the userspace 'shadow' PGD. + +Strictly, the _PAGE_NOPTISHADOW bit doesn't need to be written out to the +actual page tables; since __pti_set_user_pgtbl() returns the value to be +written to the kernel page table, it could be filtered out. But there seems +to be no benefit to actually doing so. + +Suggested-by: Dave Hansen +Signed-off-by: David Woodhouse +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/412c90a4df7aef077141d9f68d19cbe5602d6c6d.camel@infradead.org +Cc: stable@kernel.org +Cc: Linus Torvalds +Cc: Andy Lutomirski +Cc: Peter Zijlstra +Cc: Rik van Riel +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/pgtable_types.h | 8 ++++++-- + arch/x86/mm/ident_map.c | 6 +++--- + arch/x86/mm/pti.c | 2 +- + 3 files changed, 10 insertions(+), 6 deletions(-) + +--- a/arch/x86/include/asm/pgtable_types.h ++++ b/arch/x86/include/asm/pgtable_types.h +@@ -36,10 +36,12 @@ + #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 + + #ifdef CONFIG_X86_64 +-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */ ++#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */ ++#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */ + #else + /* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */ +-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */ ++#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */ ++#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */ + #endif + + /* If _PAGE_BIT_PRESENT is clear, we use these: */ +@@ -139,6 +141,8 @@ + + #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) + ++#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW) ++ + /* + * Set of bits not changed in pte_modify. The pte's + * protection key is treated like _PAGE_RW, for +--- a/arch/x86/mm/ident_map.c ++++ b/arch/x86/mm/ident_map.c +@@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_map + if (result) + return result; + +- set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); ++ set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); + } + + return 0; +@@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86 + if (result) + return result; + if (pgtable_l5_enabled()) { +- set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); ++ set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW)); + } else { + /* + * With p4d folded, pgd is equal to p4d. + * The pgd entry has to point to the pud page table in this case. + */ + pud_t *pud = pud_offset(p4d, 0); +- set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag)); ++ set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW)); + } + } + +--- a/arch/x86/mm/pti.c ++++ b/arch/x86/mm/pti.c +@@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ +- if (!pgdp_maps_userspace(pgdp)) ++ if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW)) + return pgd; + + /*