]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Dec 2024 09:47:06 +0000 (10:47 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Dec 2024 09:47:06 +0000 (10:47 +0100)
added patches:
kasan-make-report_lock-a-raw-spinlock.patch
lib-stackinit-hide-never-taken-branch-from-compiler.patch
mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch
mm-fix-vrealloc-s-kasan-poisoning-logic.patch
mm-gup-handle-null-pages-in-unpin_user_pages.patch
mm-memcg-declare-do_memsw_account-inline.patch
mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch
mm-open-code-page_folio-in-dump_page.patch
mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch
mm-respect-mmap-hint-address-when-aligning-for-thp.patch
ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch
sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch
stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch
x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch
x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch

17 files changed:
queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch [new file with mode: 0644]
queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch [new file with mode: 0644]
queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch [new file with mode: 0644]
queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch [new file with mode: 0644]
queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch [new file with mode: 0644]
queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch [new file with mode: 0644]
queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch [new file with mode: 0644]
queue-6.12/mm-open-code-page_folio-in-dump_page.patch [new file with mode: 0644]
queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch [new file with mode: 0644]
queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch [new file with mode: 0644]
queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch [new file with mode: 0644]
queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch [new file with mode: 0644]
queue-6.12/scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch
queue-6.12/series
queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch [new file with mode: 0644]
queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch [new file with mode: 0644]
queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch [new file with mode: 0644]

diff --git a/queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch b/queue-6.12/kasan-make-report_lock-a-raw-spinlock.patch
new file mode 100644 (file)
index 0000000..3af3b01
--- /dev/null
@@ -0,0 +1,66 @@
+From e30a0361b8515d424c73c67de1a43e45a13b8ba2 Mon Sep 17 00:00:00 2001
+From: Jared Kangas <jkangas@redhat.com>
+Date: Tue, 19 Nov 2024 13:02:34 -0800
+Subject: kasan: make report_lock a raw spinlock
+
+From: Jared Kangas <jkangas@redhat.com>
+
+commit e30a0361b8515d424c73c67de1a43e45a13b8ba2 upstream.
+
+If PREEMPT_RT is enabled, report_lock is a sleeping spinlock and must not
+be locked when IRQs are disabled.  However, KASAN reports may be triggered
+in such contexts.  For example:
+
+        char *s = kzalloc(1, GFP_KERNEL);
+        kfree(s);
+        local_irq_disable();
+        char c = *s;  /* KASAN report here leads to spin_lock() */
+        local_irq_enable();
+
+Make report_spinlock a raw spinlock to prevent rescheduling when
+PREEMPT_RT is enabled.
+
+Link: https://lkml.kernel.org/r/20241119210234.1602529-1-jkangas@redhat.com
+Fixes: 342a93247e08 ("locking/spinlock: Provide RT variant header: <linux/spinlock_rt.h>")
+Signed-off-by: Jared Kangas <jkangas@redhat.com>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/kasan/report.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/mm/kasan/report.c
++++ b/mm/kasan/report.c
+@@ -200,7 +200,7 @@ static inline void fail_non_kasan_kunit_
+ #endif /* CONFIG_KUNIT */
+-static DEFINE_SPINLOCK(report_lock);
++static DEFINE_RAW_SPINLOCK(report_lock);
+ static void start_report(unsigned long *flags, bool sync)
+ {
+@@ -211,7 +211,7 @@ static void start_report(unsigned long *
+       lockdep_off();
+       /* Make sure we don't end up in loop. */
+       report_suppress_start();
+-      spin_lock_irqsave(&report_lock, *flags);
++      raw_spin_lock_irqsave(&report_lock, *flags);
+       pr_err("==================================================================\n");
+ }
+@@ -221,7 +221,7 @@ static void end_report(unsigned long *fl
+               trace_error_report_end(ERROR_DETECTOR_KASAN,
+                                      (unsigned long)addr);
+       pr_err("==================================================================\n");
+-      spin_unlock_irqrestore(&report_lock, *flags);
++      raw_spin_unlock_irqrestore(&report_lock, *flags);
+       if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+               check_panic_on_warn("KASAN");
+       switch (kasan_arg_fault) {
diff --git a/queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch b/queue-6.12/lib-stackinit-hide-never-taken-branch-from-compiler.patch
new file mode 100644 (file)
index 0000000..e8d52b6
--- /dev/null
@@ -0,0 +1,40 @@
+From 5c3793604f91123bf49bc792ce697a0bef4c173c Mon Sep 17 00:00:00 2001
+From: Kees Cook <kees@kernel.org>
+Date: Sun, 17 Nov 2024 03:38:13 -0800
+Subject: lib: stackinit: hide never-taken branch from compiler
+
+From: Kees Cook <kees@kernel.org>
+
+commit 5c3793604f91123bf49bc792ce697a0bef4c173c upstream.
+
+The never-taken branch leads to an invalid bounds condition, which is by
+design. To avoid the unwanted warning from the compiler, hide the
+variable from the optimizer.
+
+../lib/stackinit_kunit.c: In function 'do_nothing_u16_zero':
+../lib/stackinit_kunit.c:51:49: error: array subscript 1 is outside array bounds of 'u16[0]' {aka 'short unsigned int[]'} [-Werror=array-bounds=]
+   51 | #define DO_NOTHING_RETURN_SCALAR(ptr)           *(ptr)
+      |                                                 ^~~~~~
+../lib/stackinit_kunit.c:219:24: note: in expansion of macro 'DO_NOTHING_RETURN_SCALAR'
+  219 |                 return DO_NOTHING_RETURN_ ## which(ptr + 1);    \
+      |                        ^~~~~~~~~~~~~~~~~~
+
+Link: https://lkml.kernel.org/r/20241117113813.work.735-kees@kernel.org
+Signed-off-by: Kees Cook <kees@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/stackinit_kunit.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/lib/stackinit_kunit.c
++++ b/lib/stackinit_kunit.c
+@@ -212,6 +212,7 @@ static noinline void test_ ## name (stru
+ static noinline DO_NOTHING_TYPE_ ## which(var_type)           \
+ do_nothing_ ## name(var_type *ptr)                            \
+ {                                                             \
++      OPTIMIZER_HIDE_VAR(ptr);                                \
+       /* Will always be true, but compiler doesn't know. */   \
+       if ((unsigned long)ptr > 0x2)                           \
+               return DO_NOTHING_RETURN_ ## which(ptr);        \
diff --git a/queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch b/queue-6.12/mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch
new file mode 100644 (file)
index 0000000..d5a58b7
--- /dev/null
@@ -0,0 +1,43 @@
+From 6535b8669c1a74078098517174e53fc907ce9d56 Mon Sep 17 00:00:00 2001
+From: Akinobu Mita <akinobu.mita@gmail.com>
+Date: Fri, 15 Nov 2024 10:20:23 -0800
+Subject: mm/damon: fix order of arguments in damos_before_apply tracepoint
+
+From: Akinobu Mita <akinobu.mita@gmail.com>
+
+commit 6535b8669c1a74078098517174e53fc907ce9d56 upstream.
+
+Since the order of the scheme_idx and target_idx arguments in TP_ARGS is
+reversed, they are stored in the trace record in reverse.
+
+Link: https://lkml.kernel.org/r/20241115182023.43118-1-sj@kernel.org
+Link: https://patch.msgid.link/20241112154828.40307-1-akinobu.mita@gmail.com
+Fixes: c603c630b509 ("mm/damon/core: add a tracepoint for damos apply target regions")
+Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/damon.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h
+index 23200aabccac..da4bd9fd1162 100644
+--- a/include/trace/events/damon.h
++++ b/include/trace/events/damon.h
+@@ -15,7 +15,7 @@ TRACE_EVENT_CONDITION(damos_before_apply,
+               unsigned int target_idx, struct damon_region *r,
+               unsigned int nr_regions, bool do_trace),
+-      TP_ARGS(context_idx, target_idx, scheme_idx, r, nr_regions, do_trace),
++      TP_ARGS(context_idx, scheme_idx, target_idx, r, nr_regions, do_trace),
+       TP_CONDITION(do_trace),
+-- 
+2.47.1
+
diff --git a/queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch b/queue-6.12/mm-fix-vrealloc-s-kasan-poisoning-logic.patch
new file mode 100644 (file)
index 0000000..658c147
--- /dev/null
@@ -0,0 +1,54 @@
+From d699440f58ce9bd71103cc7b692e3ab76a20bfcd Mon Sep 17 00:00:00 2001
+From: Andrii Nakryiko <andrii@kernel.org>
+Date: Mon, 25 Nov 2024 16:52:06 -0800
+Subject: mm: fix vrealloc()'s KASAN poisoning logic
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+commit d699440f58ce9bd71103cc7b692e3ab76a20bfcd upstream.
+
+When vrealloc() reuses already allocated vmap_area, we need to re-annotate
+poisoned and unpoisoned portions of underlying memory according to the new
+size.
+
+This results in a KASAN splat recorded at [1].  A KASAN mis-reporting
+issue where there is none.
+
+Note, hard-coding KASAN_VMALLOC_PROT_NORMAL might not be exactly correct,
+but KASAN flag logic is pretty involved and spread out throughout
+__vmalloc_node_range_noprof(), so I'm using the bare minimum flag here and
+leaving the rest to mm people to refactor this logic and reuse it here.
+
+Link: https://lkml.kernel.org/r/20241126005206.3457974-1-andrii@kernel.org
+Link: https://lore.kernel.org/bpf/67450f9b.050a0220.21d33d.0004.GAE@google.com/ [1]
+Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()")
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 7ed39d104201..f009b21705c1 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -4093,7 +4093,8 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
+               /* Zero out spare memory. */
+               if (want_init_on_alloc(flags))
+                       memset((void *)p + size, 0, old_size - size);
+-
++              kasan_poison_vmalloc(p + size, old_size - size);
++              kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL);
+               return (void *)p;
+       }
+-- 
+2.47.1
+
diff --git a/queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch b/queue-6.12/mm-gup-handle-null-pages-in-unpin_user_pages.patch
new file mode 100644 (file)
index 0000000..5f0f8af
--- /dev/null
@@ -0,0 +1,98 @@
+From a1268be280d8e484ab3606d7476edd0f14bb9961 Mon Sep 17 00:00:00 2001
+From: John Hubbard <jhubbard@nvidia.com>
+Date: Wed, 20 Nov 2024 19:49:33 -0800
+Subject: mm/gup: handle NULL pages in unpin_user_pages()
+
+From: John Hubbard <jhubbard@nvidia.com>
+
+commit a1268be280d8e484ab3606d7476edd0f14bb9961 upstream.
+
+The recent addition of "pofs" (pages or folios) handling to gup has a
+flaw: it assumes that unpin_user_pages() handles NULL pages in the pages**
+array.  That's not the case, as I discovered when I ran on a new
+configuration on my test machine.
+
+Fix this by skipping NULL pages in unpin_user_pages(), just like
+unpin_folios() already does.
+
+Details: when booting on x86 with "numa=fake=2 movablecore=4G" on Linux
+6.12, and running this:
+
+    tools/testing/selftests/mm/gup_longterm
+
+...I get the following crash:
+
+BUG: kernel NULL pointer dereference, address: 0000000000000008
+RIP: 0010:sanity_check_pinned_pages+0x3a/0x2d0
+...
+Call Trace:
+ <TASK>
+ ? __die_body+0x66/0xb0
+ ? page_fault_oops+0x30c/0x3b0
+ ? do_user_addr_fault+0x6c3/0x720
+ ? irqentry_enter+0x34/0x60
+ ? exc_page_fault+0x68/0x100
+ ? asm_exc_page_fault+0x22/0x30
+ ? sanity_check_pinned_pages+0x3a/0x2d0
+ unpin_user_pages+0x24/0xe0
+ check_and_migrate_movable_pages_or_folios+0x455/0x4b0
+ __gup_longterm_locked+0x3bf/0x820
+ ? mmap_read_lock_killable+0x12/0x50
+ ? __pfx_mmap_read_lock_killable+0x10/0x10
+ pin_user_pages+0x66/0xa0
+ gup_test_ioctl+0x358/0xb20
+ __se_sys_ioctl+0x6b/0xc0
+ do_syscall_64+0x7b/0x150
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+Link: https://lkml.kernel.org/r/20241121034933.77502-1-jhubbard@nvidia.com
+Fixes: 94efde1d1539 ("mm/gup: avoid an unnecessary allocation call for FOLL_LONGTERM cases")
+Signed-off-by: John Hubbard <jhubbard@nvidia.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Vivek Kasireddy <vivek.kasireddy@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Gerd Hoffmann <kraxel@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Dongwon Kim <dongwon.kim@intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Junxiao Chang <junxiao.chang@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/gup.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -52,7 +52,12 @@ static inline void sanity_check_pinned_p
+        */
+       for (; npages; npages--, pages++) {
+               struct page *page = *pages;
+-              struct folio *folio = page_folio(page);
++              struct folio *folio;
++
++              if (!page)
++                      continue;
++
++              folio = page_folio(page);
+               if (is_zero_page(page) ||
+                   !folio_test_anon(folio))
+@@ -409,6 +414,10 @@ void unpin_user_pages(struct page **page
+       sanity_check_pinned_pages(pages, npages);
+       for (i = 0; i < npages; i += nr) {
++              if (!pages[i]) {
++                      nr = 1;
++                      continue;
++              }
+               folio = gup_folio_next(pages, npages, i, &nr);
+               gup_put_folio(folio, nr, FOLL_PIN);
+       }
diff --git a/queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch b/queue-6.12/mm-memcg-declare-do_memsw_account-inline.patch
new file mode 100644 (file)
index 0000000..10abaa0
--- /dev/null
@@ -0,0 +1,50 @@
+From 89dd878282881306c38f7e354e7614fca98cb9a6 Mon Sep 17 00:00:00 2001
+From: John Sperbeck <jsperbeck@google.com>
+Date: Thu, 28 Nov 2024 12:39:59 -0800
+Subject: mm: memcg: declare do_memsw_account inline
+
+From: John Sperbeck <jsperbeck@google.com>
+
+commit 89dd878282881306c38f7e354e7614fca98cb9a6 upstream.
+
+In commit 66d60c428b23 ("mm: memcg: move legacy memcg event code into
+memcontrol-v1.c"), the static do_memsw_account() function was moved from a
+.c file to a .h file.  Unfortunately, the traditional inline keyword
+wasn't added.  If a file (e.g., a unit test) includes the .h file, but
+doesn't refer to do_memsw_account(), it will get a warning like:
+
+mm/memcontrol-v1.h:41:13: warning: unused function 'do_memsw_account' [-Wunused-function]
+   41 | static bool do_memsw_account(void)
+      |             ^~~~~~~~~~~~~~~~
+
+Link: https://lkml.kernel.org/r/20241128203959.726527-1-jsperbeck@google.com
+Fixes: 66d60c428b23 ("mm: memcg: move legacy memcg event code into memcontrol-v1.c")
+Signed-off-by: John Sperbeck <jsperbeck@google.com>
+Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol-v1.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
+index 0e3b82951d91..144d71b65907 100644
+--- a/mm/memcontrol-v1.h
++++ b/mm/memcontrol-v1.h
+@@ -38,7 +38,7 @@ void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n);
+            iter = mem_cgroup_iter(NULL, iter, NULL))
+ /* Whether legacy memory+swap accounting is active */
+-static bool do_memsw_account(void)
++static inline bool do_memsw_account(void)
+ {
+       return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
+ }
+-- 
+2.47.1
+
diff --git a/queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch b/queue-6.12/mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch
new file mode 100644 (file)
index 0000000..ece2b00
--- /dev/null
@@ -0,0 +1,74 @@
+From 091c1dd2d4df6edd1beebe0e5863d4034ade9572 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 20 Nov 2024 21:11:51 +0100
+Subject: mm/mempolicy: fix migrate_to_node() assuming there is at least one VMA in a MM
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 091c1dd2d4df6edd1beebe0e5863d4034ade9572 upstream.
+
+We currently assume that there is at least one VMA in a MM, which isn't
+true.
+
+So we might end up having find_vma() return NULL, to then de-reference
+NULL.  So properly handle find_vma() returning NULL.
+
+This fixes the report:
+
+Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+CPU: 1 UID: 0 PID: 6021 Comm: syz-executor284 Not tainted 6.12.0-rc7-syzkaller-00187-gf868cd251776 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024
+RIP: 0010:migrate_to_node mm/mempolicy.c:1090 [inline]
+RIP: 0010:do_migrate_pages+0x403/0x6f0 mm/mempolicy.c:1194
+Code: ...
+RSP: 0018:ffffc9000375fd08 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: ffffc9000375fd78 RCX: 0000000000000000
+RDX: ffff88807e171300 RSI: dffffc0000000000 RDI: ffff88803390c044
+RBP: ffff88807e171428 R08: 0000000000000014 R09: fffffbfff2039ef1
+R10: ffffffff901cf78f R11: 0000000000000000 R12: 0000000000000003
+R13: ffffc9000375fe90 R14: ffffc9000375fe98 R15: ffffc9000375fdf8
+FS:  00005555919e1380(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00005555919e1ca8 CR3: 000000007f12a000 CR4: 00000000003526f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ kernel_migrate_pages+0x5b2/0x750 mm/mempolicy.c:1709
+ __do_sys_migrate_pages mm/mempolicy.c:1727 [inline]
+ __se_sys_migrate_pages mm/mempolicy.c:1723 [inline]
+ __x64_sys_migrate_pages+0x96/0x100 mm/mempolicy.c:1723
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+[akpm@linux-foundation.org: add unlikely()]
+Link: https://lkml.kernel.org/r/20241120201151.9518-1-david@redhat.com
+Fixes: 39743889aaf7 ("[PATCH] Swap Migration V5: sys_migrate_pages interface")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Reported-by: syzbot+3511625422f7aa637f0d@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/lkml/673d2696.050a0220.3c9d61.012f.GAE@google.com/T/
+Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Reviewed-by: Christoph Lameter <cl@linux.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -1080,6 +1080,10 @@ static long migrate_to_node(struct mm_st
+       mmap_read_lock(mm);
+       vma = find_vma(mm, 0);
++      if (unlikely(!vma)) {
++              mmap_read_unlock(mm);
++              return 0;
++      }
+       /*
+        * This does not migrate the range, but isolates all pages that
diff --git a/queue-6.12/mm-open-code-page_folio-in-dump_page.patch b/queue-6.12/mm-open-code-page_folio-in-dump_page.patch
new file mode 100644 (file)
index 0000000..44a26d0
--- /dev/null
@@ -0,0 +1,60 @@
+From 6a7de1bf218d75f27f68d6a3f5ae1eb7332b941e Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon, 25 Nov 2024 20:17:19 +0000
+Subject: mm: open-code page_folio() in dump_page()
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 6a7de1bf218d75f27f68d6a3f5ae1eb7332b941e upstream.
+
+page_folio() calls page_fixed_fake_head() which will misidentify this page
+as being a fake head and load off the end of 'precise'.  We may have a
+pointer to a fake head, but that's OK because it contains the right
+information for dump_page().
+
+gcc-15 is smart enough to catch this with -Warray-bounds:
+
+In function 'page_fixed_fake_head',
+    inlined from '_compound_head' at ../include/linux/page-flags.h:251:24,
+    inlined from '__dump_page' at ../mm/debug.c:123:11:
+../include/asm-generic/rwonce.h:44:26: warning: array subscript 9 is outside
++array bounds of 'struct page[1]' [-Warray-bounds=]
+
+Link: https://lkml.kernel.org/r/20241125201721.2963278-2-willy@infradead.org
+Fixes: fae7d834c43c ("mm: add __dump_folio()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Reported-by: Kees Cook <kees@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/debug.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/debug.c
++++ b/mm/debug.c
+@@ -124,19 +124,22 @@ static void __dump_page(const struct pag
+ {
+       struct folio *foliop, folio;
+       struct page precise;
++      unsigned long head;
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long idx, nr_pages = 1;
+       int loops = 5;
+ again:
+       memcpy(&precise, page, sizeof(*page));
+-      foliop = page_folio(&precise);
+-      if (foliop == (struct folio *)&precise) {
++      head = precise.compound_head;
++      if ((head & 1) == 0) {
++              foliop = (struct folio *)&precise;
+               idx = 0;
+               if (!folio_test_large(foliop))
+                       goto dump;
+               foliop = (struct folio *)page;
+       } else {
++              foliop = (struct folio *)(head - 1);
+               idx = folio_page_idx(foliop, page);
+       }
diff --git a/queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch b/queue-6.12/mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch
new file mode 100644 (file)
index 0000000..44ccbff
--- /dev/null
@@ -0,0 +1,47 @@
+From 4de22b2a6a7477d84d9a01eb6b62a9117309d722 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Mon, 25 Nov 2024 20:17:18 +0000
+Subject: mm: open-code PageTail in folio_flags() and const_folio_flags()
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 4de22b2a6a7477d84d9a01eb6b62a9117309d722 upstream.
+
+It is unsafe to call PageTail() in dump_page() as page_is_fake_head() will
+almost certainly return true when called on a head page that is copied to
+the stack.  That will cause the VM_BUG_ON_PGFLAGS() in const_folio_flags()
+to trigger when it shouldn't.  Fortunately, we don't need to call
+PageTail() here; it's fine to have a pointer to a virtual alias of the
+page's flag word rather than the real page's flag word.
+
+Link: https://lkml.kernel.org/r/20241125201721.2963278-1-willy@infradead.org
+Fixes: fae7d834c43c ("mm: add __dump_folio()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Kees Cook <kees@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/page-flags.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -306,7 +306,7 @@ static const unsigned long *const_folio_
+ {
+       const struct page *page = &folio->page;
+-      VM_BUG_ON_PGFLAGS(PageTail(page), page);
++      VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+       VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+       return &page[n].flags;
+ }
+@@ -315,7 +315,7 @@ static unsigned long *folio_flags(struct
+ {
+       struct page *page = &folio->page;
+-      VM_BUG_ON_PGFLAGS(PageTail(page), page);
++      VM_BUG_ON_PGFLAGS(page->compound_head & 1, page);
+       VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+       return &page[n].flags;
+ }
diff --git a/queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch b/queue-6.12/mm-respect-mmap-hint-address-when-aligning-for-thp.patch
new file mode 100644 (file)
index 0000000..5b80ec3
--- /dev/null
@@ -0,0 +1,111 @@
+From 249608ee47132cab3b1adacd9e463548f57bd316 Mon Sep 17 00:00:00 2001
+From: Kalesh Singh <kaleshsingh@google.com>
+Date: Mon, 18 Nov 2024 13:46:48 -0800
+Subject: mm: respect mmap hint address when aligning for THP
+
+From: Kalesh Singh <kaleshsingh@google.com>
+
+commit 249608ee47132cab3b1adacd9e463548f57bd316 upstream.
+
+Commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
+boundaries") updated __get_unmapped_area() to align the start address for
+the VMA to a PMD boundary if CONFIG_TRANSPARENT_HUGEPAGE=y.
+
+It does this by effectively looking up a region that is of size,
+request_size + PMD_SIZE, and aligning up the start to a PMD boundary.
+
+Commit 4ef9ad19e176 ("mm: huge_memory: don't force huge page alignment on
+32 bit") opted out of this for 32bit due to regressions in mmap base
+randomization.
+
+Commit d4148aeab412 ("mm, mmap: limit THP alignment of anonymous mappings
+to PMD-aligned sizes") restricted this to only mmap sizes that are
+multiples of the PMD_SIZE due to reported regressions in some performance
+benchmarks -- which seemed mostly due to the reduced spatial locality of
+related mappings due to the forced PMD-alignment.
+
+Another unintended side effect has emerged: When a user specifies an mmap
+hint address, the THP alignment logic modifies the behavior, potentially
+ignoring the hint even if a sufficiently large gap exists at the requested
+hint location.
+
+Example Scenario:
+
+Consider the following simplified virtual address (VA) space:
+
+    ...
+
+    0x200000-0x400000 --- VMA A
+    0x400000-0x600000 --- Hole
+    0x600000-0x800000 --- VMA B
+
+    ...
+
+A call to mmap() with hint=0x400000 and len=0x200000 behaves differently:
+
+  - Before THP alignment: The requested region (size 0x200000) fits into
+    the gap at 0x400000, so the hint is respected.
+
+  - After alignment: The logic searches for a region of size
+    0x400000 (len + PMD_SIZE) starting at 0x400000.
+    This search fails due to the mapping at 0x600000 (VMA B), and the hint
+    is ignored, falling back to arch_get_unmapped_area[_topdown]().
+
+In general the hint is effectively ignored, if there is any existing
+mapping in the below range:
+
+     [mmap_hint + mmap_size, mmap_hint + mmap_size + PMD_SIZE)
+
+This changes the semantics of mmap hint; from ""Respect the hint if a
+sufficiently large gap exists at the requested location" to "Respect the
+hint only if an additional PMD-sized gap exists beyond the requested
+size".
+
+This has performance implications for allocators that allocate their heap
+using mmap but try to keep it "as contiguous as possible" by using the end
+of the exisiting heap as the address hint.  With the new behavior it's
+more likely to get a much less contiguous heap, adding extra fragmentation
+and performance overhead.
+
+To restore the expected behavior; don't use
+thp_get_unmapped_area_vmflags() when the user provided a hint address, for
+anonymous mappings.
+
+Note: As Yang Shi pointed out: the issue still remains for filesystems
+which are using thp_get_unmapped_area() for their get_unmapped_area() op.
+It is unclear what worklaods will regress for if we ignore THP alignment
+when the hint address is provided for such file backed mappings -- so this
+fix will be handled separately.
+
+Link: https://lkml.kernel.org/r/20241118214650.3667577-1-kaleshsingh@google.com
+Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
+Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
+Reviewed-by: Rik van Riel <riel@surriel.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yang Shi <yang@os.amperecomputing.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Hans Boehm <hboehm@google.com>
+Cc: Lokesh Gidra <lokeshgidra@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mmap.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -901,6 +901,7 @@ __get_unmapped_area(struct file *file, u
+       if (get_area) {
+               addr = get_area(file, addr, len, pgoff, flags);
+       } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
++                 && !addr /* no hint */
+                  && IS_ALIGNED(len, PMD_SIZE)) {
+               /* Ensures that larger anonymous mappings are THP aligned. */
+               addr = thp_get_unmapped_area_vmflags(file, addr, len,
diff --git a/queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch b/queue-6.12/ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch
new file mode 100644 (file)
index 0000000..21abce5
--- /dev/null
@@ -0,0 +1,43 @@
+From 914eec5e980171bc128e7e24f7a22aa1d803570e Mon Sep 17 00:00:00 2001
+From: Wengang Wang <wen.gang.wang@oracle.com>
+Date: Tue, 19 Nov 2024 09:45:00 -0800
+Subject: ocfs2: update seq_file index in ocfs2_dlm_seq_next
+
+From: Wengang Wang <wen.gang.wang@oracle.com>
+
+commit 914eec5e980171bc128e7e24f7a22aa1d803570e upstream.
+
+The following INFO level message was seen:
+
+seq_file: buggy .next function ocfs2_dlm_seq_next [ocfs2] did not
+update position index
+
+Fix:
+Update *pos (so m->index) to make seq_read_iter happy though the index its
+self makes no sense to ocfs2_dlm_seq_next.
+
+Link: https://lkml.kernel.org/r/20241119174500.9198-1-wen.gang.wang@oracle.com
+Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/dlmglue.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/ocfs2/dlmglue.c
++++ b/fs/ocfs2/dlmglue.c
+@@ -3110,6 +3110,7 @@ static void *ocfs2_dlm_seq_next(struct s
+       struct ocfs2_lock_res *iter = v;
+       struct ocfs2_lock_res *dummy = &priv->p_iter_res;
++      (*pos)++;
+       spin_lock(&ocfs2_dlm_tracking_lock);
+       iter = ocfs2_dlm_next_res(iter, priv);
+       list_del_init(&dummy->l_debug_list);
diff --git a/queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch b/queue-6.12/sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch
new file mode 100644 (file)
index 0000000..72d08d9
--- /dev/null
@@ -0,0 +1,106 @@
+From 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 Mon Sep 17 00:00:00 2001
+From: Adrian Huang <ahuang12@lenovo.com>
+Date: Wed, 13 Nov 2024 18:21:46 +0800
+Subject: sched/numa: fix memory leak due to the overwritten vma->numab_state
+
+From: Adrian Huang <ahuang12@lenovo.com>
+
+commit 5f1b64e9a9b7ee9cfd32c6b2fab796e29bfed075 upstream.
+
+[Problem Description]
+When running the hackbench program of LTP, the following memory leak is
+reported by kmemleak.
+
+  # /opt/ltp/testcases/bin/hackbench 20 thread 1000
+  Running with 20*40 (== 800) tasks.
+
+  # dmesg | grep kmemleak
+  ...
+  kmemleak: 480 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
+  kmemleak: 665 new suspected memory leaks (see /sys/kernel/debug/kmemleak)
+
+  # cat /sys/kernel/debug/kmemleak
+  unreferenced object 0xffff888cd8ca2c40 (size 64):
+    comm "hackbench", pid 17142, jiffies 4299780315
+    hex dump (first 32 bytes):
+      ac 74 49 00 01 00 00 00 4c 84 49 00 01 00 00 00  .tI.....L.I.....
+      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    backtrace (crc bff18fd4):
+      [<ffffffff81419a89>] __kmalloc_cache_noprof+0x2f9/0x3f0
+      [<ffffffff8113f715>] task_numa_work+0x725/0xa00
+      [<ffffffff8110f878>] task_work_run+0x58/0x90
+      [<ffffffff81ddd9f8>] syscall_exit_to_user_mode+0x1c8/0x1e0
+      [<ffffffff81dd78d5>] do_syscall_64+0x85/0x150
+      [<ffffffff81e0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
+  ...
+
+This issue can be consistently reproduced on three different servers:
+  * a 448-core server
+  * a 256-core server
+  * a 192-core server
+
+[Root Cause]
+Since multiple threads are created by the hackbench program (along with
+the command argument 'thread'), a shared vma might be accessed by two or
+more cores simultaneously. When two or more cores observe that
+vma->numab_state is NULL at the same time, vma->numab_state will be
+overwritten.
+
+Although current code ensures that only one thread scans the VMAs in a
+single 'numa_scan_period', there might be a chance for another thread
+to enter in the next 'numa_scan_period' while we have not gotten till
+numab_state allocation [1].
+
+Note that the command `/opt/ltp/testcases/bin/hackbench 50 process 1000`
+cannot the reproduce the issue. It is verified with 200+ test runs.
+
+[Solution]
+Use the cmpxchg atomic operation to ensure that only one thread executes
+the vma->numab_state assignment.
+
+[1] https://lore.kernel.org/lkml/1794be3c-358c-4cdc-a43d-a1f841d91ef7@amd.com/
+
+Link: https://lkml.kernel.org/r/20241113102146.2384-1-ahuang12@lenovo.com
+Fixes: ef6a22b70f6d ("sched/numa: apply the scan delay to every new vma")
+Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
+Reported-by: Jiwei Sun <sunjw10@lenovo.com>
+Reviewed-by: Raghavendra K T <raghavendra.kt@amd.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Ben Segall <bsegall@google.com>
+Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Valentin Schneider <vschneid@redhat.com>
+Cc: Vincent Guittot <vincent.guittot@linaro.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/fair.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3399,10 +3399,16 @@ retry_pids:
+               /* Initialise new per-VMA NUMAB state. */
+               if (!vma->numab_state) {
+-                      vma->numab_state = kzalloc(sizeof(struct vma_numab_state),
+-                              GFP_KERNEL);
+-                      if (!vma->numab_state)
++                      struct vma_numab_state *ptr;
++
++                      ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
++                      if (!ptr)
++                              continue;
++
++                      if (cmpxchg(&vma->numab_state, NULL, ptr)) {
++                              kfree(ptr);
+                               continue;
++                      }
+                       vma->numab_state->start_scan_seq = mm->numa_scan_seq;
index 4e8b324ca87ef8d542f62ed944295541ca4e1ab2..f59af0632bd9c362267afff4d60614e38ee698d2 100644 (file)
@@ -26,19 +26,17 @@ Reviewed-by: Bart Van Assche <bvanassche@acm.org>
 Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
- drivers/ufs/host/tc-dwc-g210-pltfrm.c | 1 -
- drivers/ufs/host/ufs-exynos.c         | 1 -
- drivers/ufs/host/ufs-mediatek.c       | 1 -
- drivers/ufs/host/ufs-qcom.c           | 1 -
- drivers/ufs/host/ufs-sprd.c           | 1 -
- drivers/ufs/host/ufshcd-pltfrm.c      | 2 ++
+ drivers/ufs/host/tc-dwc-g210-pltfrm.c |    1 -
+ drivers/ufs/host/ufs-exynos.c         |    1 -
+ drivers/ufs/host/ufs-mediatek.c       |    1 -
+ drivers/ufs/host/ufs-qcom.c           |    1 -
+ drivers/ufs/host/ufs-sprd.c           |    1 -
+ drivers/ufs/host/ufshcd-pltfrm.c      |    2 ++
  6 files changed, 2 insertions(+), 5 deletions(-)
 
-diff --git a/drivers/ufs/host/tc-dwc-g210-pltfrm.c b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
-index 113e0ef7b2cf..c6f8565ede21 100644
 --- a/drivers/ufs/host/tc-dwc-g210-pltfrm.c
 +++ b/drivers/ufs/host/tc-dwc-g210-pltfrm.c
-@@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struct platform_device *pdev)
+@@ -76,7 +76,6 @@ static int tc_dwc_g210_pltfm_probe(struc
   */
  static void tc_dwc_g210_pltfm_remove(struct platform_device *pdev)
  {
@@ -46,11 +44,9 @@ index 113e0ef7b2cf..c6f8565ede21 100644
        ufshcd_pltfrm_remove(pdev);
  }
  
-diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
-index b20f6526777a..9d4db13e142d 100644
 --- a/drivers/ufs/host/ufs-exynos.c
 +++ b/drivers/ufs/host/ufs-exynos.c
-@@ -1992,7 +1992,6 @@ static void exynos_ufs_remove(struct platform_device *pdev)
+@@ -1963,7 +1963,6 @@ static void exynos_ufs_remove(struct pla
        struct ufs_hba *hba =  platform_get_drvdata(pdev);
        struct exynos_ufs *ufs = ufshcd_get_variant(hba);
  
@@ -58,11 +54,9 @@ index b20f6526777a..9d4db13e142d 100644
        ufshcd_pltfrm_remove(pdev);
  
        phy_power_off(ufs->phy);
-diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
-index b444146419de..ffe4d03a0f38 100644
 --- a/drivers/ufs/host/ufs-mediatek.c
 +++ b/drivers/ufs/host/ufs-mediatek.c
-@@ -1879,7 +1879,6 @@ static int ufs_mtk_probe(struct platform_device *pdev)
+@@ -1869,7 +1869,6 @@ out:
   */
  static void ufs_mtk_remove(struct platform_device *pdev)
  {
@@ -70,11 +64,9 @@ index b444146419de..ffe4d03a0f38 100644
        ufshcd_pltfrm_remove(pdev);
  }
  
-diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
-index 3762337d7576..73b4fec8221a 100644
 --- a/drivers/ufs/host/ufs-qcom.c
 +++ b/drivers/ufs/host/ufs-qcom.c
-@@ -1863,7 +1863,6 @@ static void ufs_qcom_remove(struct platform_device *pdev)
+@@ -1845,7 +1845,6 @@ static void ufs_qcom_remove(struct platf
        struct ufs_hba *hba =  platform_get_drvdata(pdev);
        struct ufs_qcom_host *host = ufshcd_get_variant(hba);
  
@@ -82,11 +74,9 @@ index 3762337d7576..73b4fec8221a 100644
        ufshcd_pltfrm_remove(pdev);
        if (host->esi_enabled)
                platform_device_msi_free_irqs_all(hba->dev);
-diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c
-index e455890cf7d4..d220978c2d8c 100644
 --- a/drivers/ufs/host/ufs-sprd.c
 +++ b/drivers/ufs/host/ufs-sprd.c
-@@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platform_device *pdev)
+@@ -427,7 +427,6 @@ static int ufs_sprd_probe(struct platfor
  
  static void ufs_sprd_remove(struct platform_device *pdev)
  {
@@ -94,11 +84,9 @@ index e455890cf7d4..d220978c2d8c 100644
        ufshcd_pltfrm_remove(pdev);
  }
  
-diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c
-index bad5b1303eb6..b8dadd0a2f4c 100644
 --- a/drivers/ufs/host/ufshcd-pltfrm.c
 +++ b/drivers/ufs/host/ufshcd-pltfrm.c
-@@ -532,8 +532,10 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev)
+@@ -532,8 +532,10 @@ void ufshcd_pltfrm_remove(struct platfor
  {
        struct ufs_hba *hba =  platform_get_drvdata(pdev);
  
@@ -109,6 +97,3 @@ index bad5b1303eb6..b8dadd0a2f4c 100644
  }
  EXPORT_SYMBOL_GPL(ufshcd_pltfrm_remove);
  
--- 
-2.47.1
-
index 6769d92f6c1d79f1bb66be3cbbd08e7be95f9f89..04e9103c49b0f2bbe457d030875307b08252fcd3 100644 (file)
@@ -180,7 +180,6 @@ drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch
 drm-amd-pm-fix-and-simplify-workload-handling.patch
 drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch
 drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch
-drm-amdgpu-rework-resume-handling-for-display-v2.patch
 drm-amd-display-correct-prefetch-calculation.patch
 drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch
 drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch
@@ -201,4 +200,19 @@ arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch
 mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch
 mmc-core-further-prevent-card-detect-during-shutdown.patch
 x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch
+ocfs2-update-seq_file-index-in-ocfs2_dlm_seq_next.patch
+stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch
+lib-stackinit-hide-never-taken-branch-from-compiler.patch
+sched-numa-fix-memory-leak-due-to-the-overwritten-vma-numab_state.patch
+kasan-make-report_lock-a-raw-spinlock.patch
+mm-gup-handle-null-pages-in-unpin_user_pages.patch
+mm-mempolicy-fix-migrate_to_node-assuming-there-is-at-least-one-vma-in-a-mm.patch
+x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch
+x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch
+mm-damon-fix-order-of-arguments-in-damos_before_apply-tracepoint.patch
+mm-memcg-declare-do_memsw_account-inline.patch
+mm-open-code-pagetail-in-folio_flags-and-const_folio_flags.patch
+mm-open-code-page_folio-in-dump_page.patch
+mm-fix-vrealloc-s-kasan-poisoning-logic.patch
+mm-respect-mmap-hint-address-when-aligning-for-thp.patch
 scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch
diff --git a/queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch b/queue-6.12/stackdepot-fix-stack_depot_save_flags-in-nmi-context.patch
new file mode 100644 (file)
index 0000000..a0c4182
--- /dev/null
@@ -0,0 +1,87 @@
+From 031e04bdc834cda3b054ef6b698503b2b97e8186 Mon Sep 17 00:00:00 2001
+From: Marco Elver <elver@google.com>
+Date: Fri, 22 Nov 2024 16:39:47 +0100
+Subject: stackdepot: fix stack_depot_save_flags() in NMI context
+
+From: Marco Elver <elver@google.com>
+
+commit 031e04bdc834cda3b054ef6b698503b2b97e8186 upstream.
+
+Per documentation, stack_depot_save_flags() was meant to be usable from
+NMI context if STACK_DEPOT_FLAG_CAN_ALLOC is unset.  However, it still
+would try to take the pool_lock in an attempt to save a stack trace in the
+current pool (if space is available).
+
+This could result in deadlock if an NMI is handled while pool_lock is
+already held.  To avoid deadlock, only try to take the lock in NMI context
+and give up if unsuccessful.
+
+The documentation is fixed to clearly convey this.
+
+Link: https://lkml.kernel.org/r/Z0CcyfbPqmxJ9uJH@elver.google.com
+Link: https://lkml.kernel.org/r/20241122154051.3914732-1-elver@google.com
+Fixes: 4434a56ec209 ("stackdepot: make fast paths lock-less again")
+Signed-off-by: Marco Elver <elver@google.com>
+Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/stackdepot.h |    6 +++---
+ lib/stackdepot.c           |   10 +++++++++-
+ 2 files changed, 12 insertions(+), 4 deletions(-)
+
+--- a/include/linux/stackdepot.h
++++ b/include/linux/stackdepot.h
+@@ -147,7 +147,7 @@ static inline int stack_depot_early_init
+  * If the provided stack trace comes from the interrupt context, only the part
+  * up to the interrupt entry is saved.
+  *
+- * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if
++ * Context: Any context, but unsetting STACK_DEPOT_FLAG_CAN_ALLOC is required if
+  *          alloc_pages() cannot be used from the current context. Currently
+  *          this is the case for contexts where neither %GFP_ATOMIC nor
+  *          %GFP_NOWAIT can be used (NMI, raw_spin_lock).
+@@ -156,7 +156,7 @@ static inline int stack_depot_early_init
+  */
+ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
+                                           unsigned int nr_entries,
+-                                          gfp_t gfp_flags,
++                                          gfp_t alloc_flags,
+                                           depot_flags_t depot_flags);
+ /**
+@@ -175,7 +175,7 @@ depot_stack_handle_t stack_depot_save_fl
+  * Return: Handle of the stack trace stored in depot, 0 on failure
+  */
+ depot_stack_handle_t stack_depot_save(unsigned long *entries,
+-                                    unsigned int nr_entries, gfp_t gfp_flags);
++                                    unsigned int nr_entries, gfp_t alloc_flags);
+ /**
+  * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
+--- a/lib/stackdepot.c
++++ b/lib/stackdepot.c
+@@ -630,7 +630,15 @@ depot_stack_handle_t stack_depot_save_fl
+                       prealloc = page_address(page);
+       }
+-      raw_spin_lock_irqsave(&pool_lock, flags);
++      if (in_nmi()) {
++              /* We can never allocate in NMI context. */
++              WARN_ON_ONCE(can_alloc);
++              /* Best effort; bail if we fail to take the lock. */
++              if (!raw_spin_trylock_irqsave(&pool_lock, flags))
++                      goto exit;
++      } else {
++              raw_spin_lock_irqsave(&pool_lock, flags);
++      }
+       printk_deferred_enter();
+       /* Try to find again, to avoid concurrently inserting duplicates. */
diff --git a/queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch b/queue-6.12/x86-cpu-topology-remove-limit-of-cpus-due-to-disabled-io-apic.patch
new file mode 100644 (file)
index 0000000..d3b3552
--- /dev/null
@@ -0,0 +1,48 @@
+From 73da582a476ea6e3512f89f8ed57dfed945829a2 Mon Sep 17 00:00:00 2001
+From: Fernando Fernandez Mancera <ffmancera@riseup.net>
+Date: Mon, 2 Dec 2024 14:58:45 +0000
+Subject: x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC
+
+From: Fernando Fernandez Mancera <ffmancera@riseup.net>
+
+commit 73da582a476ea6e3512f89f8ed57dfed945829a2 upstream.
+
+The rework of possible CPUs management erroneously disabled SMP when the
+IO/APIC is disabled either by the 'noapic' command line parameter or during
+IO/APIC setup. SMP is possible without IO/APIC.
+
+Remove the ioapic_is_disabled conditions from the relevant possible CPU
+management code paths to restore the orgininal behaviour.
+
+Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management")
+Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20241202145905.1482-1-ffmancera@riseup.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/topology.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/cpu/topology.c
++++ b/arch/x86/kernel/cpu/topology.c
+@@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limit
+ {
+       unsigned int possible = nr_cpu_ids;
+-      /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
+-      if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
++      /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */
++      if (!setup_max_cpus || apic_is_disabled)
+               possible = 1;
+       /* 'possible_cpus=N' */
+@@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limit
+ static __init bool restrict_to_up(void)
+ {
+-      if (!smp_found_config || ioapic_is_disabled)
++      if (!smp_found_config)
+               return true;
+       /*
+        * XEN PV is special as it does not advertise the local APIC
diff --git a/queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch b/queue-6.12/x86-mm-add-_page_noptishadow-bit-to-avoid-updating-userspace-page-tables.patch
new file mode 100644 (file)
index 0000000..2593a86
--- /dev/null
@@ -0,0 +1,114 @@
+From d0ceea662d459726487030237689835fcc0483e5 Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Wed, 4 Dec 2024 11:27:14 +0000
+Subject: x86/mm: Add _PAGE_NOPTISHADOW bit to avoid updating userspace page tables
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit d0ceea662d459726487030237689835fcc0483e5 upstream.
+
+The set_p4d() and set_pgd() functions (in 4-level or 5-level page table setups
+respectively) assume that the root page table is actually a 8KiB allocation,
+with the userspace root immediately after the kernel root page table (so that
+the former can enforce NX on on all the subordinate page tables, which are
+actually shared).
+
+However, users of the kernel_ident_mapping_init() code do not give it an 8KiB
+allocation for its PGD. Both swsusp_arch_resume() and acpi_mp_setup_reset()
+allocate only a single 4KiB page. The kexec code on x86_64 currently gets
+away with it purely by chance, because it allocates 8KiB for its "control
+code page" and then actually uses the first half for the PGD, then copies the
+actual trampoline code into the second half only after the identmap code has
+finished scribbling over it.
+
+Fix this by defining a _PAGE_NOPTISHADOW bit (which can use the same bit as
+_PAGE_SAVED_DIRTY since one is only for the PGD/P4D root and the other is
+exclusively for leaf PTEs.). This instructs __pti_set_user_pgtbl() not to
+write to the userspace 'shadow' PGD.
+
+Strictly, the _PAGE_NOPTISHADOW bit doesn't need to be written out to the
+actual page tables; since __pti_set_user_pgtbl() returns the value to be
+written to the kernel page table, it could be filtered out. But there seems
+to be no benefit to actually doing so.
+
+Suggested-by: Dave Hansen <dave.hansen@intel.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/412c90a4df7aef077141d9f68d19cbe5602d6c6d.camel@infradead.org
+Cc: stable@kernel.org
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@surriel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_types.h |    8 ++++++--
+ arch/x86/mm/ident_map.c              |    6 +++---
+ arch/x86/mm/pti.c                    |    2 +-
+ 3 files changed, 10 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -36,10 +36,12 @@
+ #define _PAGE_BIT_DEVMAP      _PAGE_BIT_SOFTW4
+ #ifdef CONFIG_X86_64
+-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
++#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
++#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
+ #else
+ /* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
+-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
++#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
++#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
+ #endif
+ /* If _PAGE_BIT_PRESENT is clear, we use these: */
+@@ -139,6 +141,8 @@
+ #define _PAGE_PROTNONE        (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
++
+ /*
+  * Set of bits not changed in pte_modify.  The pte's
+  * protection key is treated like _PAGE_RW, for
+--- a/arch/x86/mm/ident_map.c
++++ b/arch/x86/mm/ident_map.c
+@@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_map
+               if (result)
+                       return result;
+-              set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
++              set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
+       }
+       return 0;
+@@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86
+               if (result)
+                       return result;
+               if (pgtable_l5_enabled()) {
+-                      set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
++                      set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
+               } else {
+                       /*
+                        * With p4d folded, pgd is equal to p4d.
+                        * The pgd entry has to point to the pud page table in this case.
+                        */
+                       pud_t *pud = pud_offset(p4d, 0);
+-                      set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
++                      set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
+               }
+       }
+--- a/arch/x86/mm/pti.c
++++ b/arch/x86/mm/pti.c
+@@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp,
+        * Top-level entries added to init_mm's usermode pgd after boot
+        * will not be automatically propagated to other mms.
+        */
+-      if (!pgdp_maps_userspace(pgdp))
++      if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
+               return pgd;
+       /*