From 5dd346ab05d1acab6e5e37bbaee7e24bb4e50a38 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 13 Jul 2025 16:52:52 +0200 Subject: [PATCH] 6.15-stable patches added patches: kallsyms-fix-build-without-execinfo.patch kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch samples-damon-fix-damon-sample-prcl-for-start-failure.patch samples-damon-fix-damon-sample-wsse-for-start-failure.patch scripts-gdb-de-reference-per-cpu-mce-interrupts.patch scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch scripts-gdb-vfs-support-external-dentry-names.patch --- .../kallsyms-fix-build-without-execinfo.patch | 48 +++ ...vm_area-to-prevent-possible-deadlock.patch | 110 +++++ ...existent-lock-in-alloc_tag_top_users.patch | 100 +++++ ...ix-mt_destroy_walk-on-root-leaf-node.patch | 40 ++ ...-as-normal-under-kdmond-deactivation.patch | 56 +++ ...by-zero-in-damon_get_intervals_score.patch | 43 ++ ...te-memory-statistics-issue-for-users.patch | 138 +++++++ ...ge-table-access-during-batched-unmap.patch | 140 +++++++ ...e-lazy-mmu-mode-on-pte-mapping-error.patch | 69 ++++ ...-damon-sample-prcl-for-start-failure.patch | 85 ++++ ...-damon-sample-wsse-for-start-failure.patch | 48 +++ ...-de-reference-per-cpu-mce-interrupts.patch | 45 ++ ...-interrupts-display-after-mcp-on-x86.patch | 35 ++ ...rupts.py-after-maple-tree-conversion.patch | 389 ++++++++++++++++++ ...db-vfs-support-external-dentry-names.patch | 47 +++ queue-6.15/series | 15 + 16 files changed, 1408 insertions(+) create mode 100644 queue-6.15/kallsyms-fix-build-without-execinfo.patch create mode 100644 queue-6.15/kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch create mode 100644 queue-6.15/lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch create mode 100644 queue-6.15/maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch create mode 100644 queue-6.15/mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch create mode 100644 queue-6.15/mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch create mode 100644 queue-6.15/mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch create mode 100644 queue-6.15/mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch create mode 100644 queue-6.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch create mode 100644 queue-6.15/samples-damon-fix-damon-sample-prcl-for-start-failure.patch create mode 100644 queue-6.15/samples-damon-fix-damon-sample-wsse-for-start-failure.patch create mode 100644 queue-6.15/scripts-gdb-de-reference-per-cpu-mce-interrupts.patch create mode 100644 queue-6.15/scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch create mode 100644 queue-6.15/scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch create mode 100644 queue-6.15/scripts-gdb-vfs-support-external-dentry-names.patch diff --git a/queue-6.15/kallsyms-fix-build-without-execinfo.patch b/queue-6.15/kallsyms-fix-build-without-execinfo.patch new file mode 100644 index 0000000000..d297135334 --- /dev/null +++ b/queue-6.15/kallsyms-fix-build-without-execinfo.patch @@ -0,0 +1,48 @@ +From a95743b53031b015e8949e845a9f6fdfb2656347 Mon Sep 17 00:00:00 2001 +From: Achill Gilgenast +Date: Sun, 22 Jun 2025 03:45:49 +0200 +Subject: kallsyms: fix build without execinfo + +From: Achill Gilgenast + +commit a95743b53031b015e8949e845a9f6fdfb2656347 upstream. + +Some libc's like musl libc don't provide execinfo.h since it's not part of +POSIX. In order to fix compilation on musl, only include execinfo.h if +available (HAVE_BACKTRACE_SUPPORT) + +This was discovered with c104c16073b7 ("Kunit to check the longest symbol +length") which starts to include linux/kallsyms.h with Alpine Linux' +configs. + +Link: https://lkml.kernel.org/r/20250622014608.448718-1-fossdd@pwned.life +Fixes: c104c16073b7 ("Kunit to check the longest symbol length") +Signed-off-by: Achill Gilgenast +Cc: Luis Henriques +Cc: Greg Kroah-Hartman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/include/linux/kallsyms.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/tools/include/linux/kallsyms.h ++++ b/tools/include/linux/kallsyms.h +@@ -18,6 +18,7 @@ static inline const char *kallsyms_looku + return NULL; + } + ++#ifdef HAVE_BACKTRACE_SUPPORT + #include + #include + static inline void print_ip_sym(const char *loglvl, unsigned long ip) +@@ -30,5 +31,8 @@ static inline void print_ip_sym(const ch + + free(name); + } ++#else ++static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} ++#endif + + #endif diff --git a/queue-6.15/kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch b/queue-6.15/kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch new file mode 100644 index 0000000000..bd21d68cd0 --- /dev/null +++ b/queue-6.15/kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch @@ -0,0 +1,110 @@ +From 6ee9b3d84775944fb8c8a447961cd01274ac671c Mon Sep 17 00:00:00 2001 +From: Yeoreum Yun +Date: Thu, 3 Jul 2025 19:10:18 +0100 +Subject: kasan: remove kasan_find_vm_area() to prevent possible deadlock + +From: Yeoreum Yun + +commit 6ee9b3d84775944fb8c8a447961cd01274ac671c upstream. + +find_vm_area() couldn't be called in atomic_context. If find_vm_area() is +called to reports vm area information, kasan can trigger deadlock like: + +CPU0 CPU1 +vmalloc(); + alloc_vmap_area(); + spin_lock(&vn->busy.lock) + spin_lock_bh(&some_lock); + + + spin_lock(&some_lock); + + kasan_report(); + print_report(); + print_address_description(); + kasan_find_vm_area(); + find_vm_area(); + spin_lock(&vn->busy.lock) // deadlock! + +To prevent possible deadlock while kasan reports, remove kasan_find_vm_area(). + +Link: https://lkml.kernel.org/r/20250703181018.580833-1-yeoreum.yun@arm.com +Fixes: c056a364e954 ("kasan: print virtual mapping info in reports") +Signed-off-by: Yeoreum Yun +Reported-by: Yunseong Kim +Reviewed-by: Andrey Ryabinin +Cc: Alexander Potapenko +Cc: Andrey Konovalov +Cc: Byungchul Park +Cc: Dmitriy Vyukov +Cc: Sebastian Andrzej Siewior +Cc: Steven Rostedt +Cc: Vincenzo Frascino +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kasan/report.c | 45 ++------------------------------------------- + 1 file changed, 2 insertions(+), 43 deletions(-) + +--- a/mm/kasan/report.c ++++ b/mm/kasan/report.c +@@ -370,36 +370,6 @@ static inline bool init_task_stack_addr( + sizeof(init_thread_union.stack)); + } + +-/* +- * This function is invoked with report_lock (a raw_spinlock) held. A +- * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping +- * rt_spinlock. +- * +- * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a +- * lockdep warning for this raw_spinlock -> spinlock dependency. This config +- * option is enabled by default to ensure better test coverage to expose this +- * kind of RT kernel problem. This lockdep splat, however, can be suppressed +- * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the +- * invalid PREEMPT_RT case has been taken care of. +- */ +-static inline struct vm_struct *kasan_find_vm_area(void *addr) +-{ +- static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); +- struct vm_struct *va; +- +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) +- return NULL; +- +- /* +- * Suppress lockdep warning and fetch vmalloc area of the +- * offending address. +- */ +- lock_map_acquire_try(&vmalloc_map); +- va = find_vm_area(addr); +- lock_map_release(&vmalloc_map); +- return va; +-} +- + static void print_address_description(void *addr, u8 tag, + struct kasan_report_info *info) + { +@@ -429,19 +399,8 @@ static void print_address_description(vo + } + + if (is_vmalloc_addr(addr)) { +- struct vm_struct *va = kasan_find_vm_area(addr); +- +- if (va) { +- pr_err("The buggy address belongs to the virtual mapping at\n" +- " [%px, %px) created by:\n" +- " %pS\n", +- va->addr, va->addr + va->size, va->caller); +- pr_err("\n"); +- +- page = vmalloc_to_page(addr); +- } else { +- pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); +- } ++ pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); ++ page = vmalloc_to_page(addr); + } + + if (page) { diff --git a/queue-6.15/lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch b/queue-6.15/lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch new file mode 100644 index 0000000000..22a548b6a7 --- /dev/null +++ b/queue-6.15/lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch @@ -0,0 +1,100 @@ +From 99af22cd34688cc0d535a1919e0bea4cbc6c1ea1 Mon Sep 17 00:00:00 2001 +From: Harry Yoo +Date: Sat, 21 Jun 2025 04:53:05 +0900 +Subject: lib/alloc_tag: do not acquire non-existent lock in alloc_tag_top_users() + +From: Harry Yoo + +commit 99af22cd34688cc0d535a1919e0bea4cbc6c1ea1 upstream. + +alloc_tag_top_users() attempts to lock alloc_tag_cttype->mod_lock even +when the alloc_tag_cttype is not allocated because: + + 1) alloc tagging is disabled because mem profiling is disabled + (!alloc_tag_cttype) + 2) alloc tagging is enabled, but not yet initialized (!alloc_tag_cttype) + 3) alloc tagging is enabled, but failed initialization + (!alloc_tag_cttype or IS_ERR(alloc_tag_cttype)) + +In all cases, alloc_tag_cttype is not allocated, and therefore +alloc_tag_top_users() should not attempt to acquire the semaphore. + +This leads to a crash on memory allocation failure by attempting to +acquire a non-existent semaphore: + + Oops: general protection fault, probably for non-canonical address 0xdffffc000000001b: 0000 [#3] SMP KASAN NOPTI + KASAN: null-ptr-deref in range [0x00000000000000d8-0x00000000000000df] + CPU: 2 UID: 0 PID: 1 Comm: systemd Tainted: G D 6.16.0-rc2 #1 VOLUNTARY + Tainted: [D]=DIE + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 + RIP: 0010:down_read_trylock+0xaa/0x3b0 + Code: d0 7c 08 84 d2 0f 85 a0 02 00 00 8b 0d df 31 dd 04 85 c9 75 29 48 b8 00 00 00 00 00 fc ff df 48 8d 6b 68 48 89 ea 48 c1 ea 03 <80> 3c 02 00 0f 85 88 02 00 00 48 3b 5b 68 0f 85 53 01 00 00 65 ff + RSP: 0000:ffff8881002ce9b8 EFLAGS: 00010016 + RAX: dffffc0000000000 RBX: 0000000000000070 RCX: 0000000000000000 + RDX: 000000000000001b RSI: 000000000000000a RDI: 0000000000000070 + RBP: 00000000000000d8 R08: 0000000000000001 R09: ffffed107dde49d1 + R10: ffff8883eef24e8b R11: ffff8881002cec20 R12: 1ffff11020059d37 + R13: 00000000003fff7b R14: ffff8881002cec20 R15: dffffc0000000000 + FS: 00007f963f21d940(0000) GS:ffff888458ca6000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f963f5edf71 CR3: 000000010672c000 CR4: 0000000000350ef0 + Call Trace: + + codetag_trylock_module_list+0xd/0x20 + alloc_tag_top_users+0x369/0x4b0 + __show_mem+0x1cd/0x6e0 + warn_alloc+0x2b1/0x390 + __alloc_frozen_pages_noprof+0x12b9/0x21a0 + alloc_pages_mpol+0x135/0x3e0 + alloc_slab_page+0x82/0xe0 + new_slab+0x212/0x240 + ___slab_alloc+0x82a/0xe00 + + +As David Wang points out, this issue became easier to trigger after commit +780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init"). + +Before the commit, the issue occurred only when it failed to allocate and +initialize alloc_tag_cttype or if a memory allocation fails before +alloc_tag_init() is called. After the commit, it can be easily triggered +when memory profiling is compiled but disabled at boot. + +To properly determine whether alloc_tag_init() has been called and its +data structures initialized, verify that alloc_tag_cttype is a valid +pointer before acquiring the semaphore. If the variable is NULL or an +error value, it has not been properly initialized. In such a case, just +skip and do not attempt to acquire the semaphore. + +[harry.yoo@oracle.com: v3] + Link: https://lkml.kernel.org/r/20250624072513.84219-1-harry.yoo@oracle.com +Link: https://lkml.kernel.org/r/20250620195305.1115151-1-harry.yoo@oracle.com +Fixes: 780138b12381 ("alloc_tag: check mem_profiling_support in alloc_tag_init") +Fixes: 1438d349d16b ("lib: add memory allocations report in show_mem()") +Signed-off-by: Harry Yoo +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202506181351.bba867dd-lkp@intel.com +Acked-by: Suren Baghdasaryan +Tested-by: Raghavendra K T +Cc: Casey Chen +Cc: David Wang <00107082@163.com> +Cc: Kent Overstreet +Cc: Yuanyuan Zhong +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/alloc_tag.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/lib/alloc_tag.c ++++ b/lib/alloc_tag.c +@@ -134,6 +134,9 @@ size_t alloc_tag_top_users(struct codeta + struct codetag_bytes n; + unsigned int i, nr = 0; + ++ if (IS_ERR_OR_NULL(alloc_tag_cttype)) ++ return 0; ++ + if (can_sleep) + codetag_lock_module_list(alloc_tag_cttype, true); + else if (!codetag_trylock_module_list(alloc_tag_cttype)) diff --git a/queue-6.15/maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch b/queue-6.15/maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch new file mode 100644 index 0000000000..7e1066d6e3 --- /dev/null +++ b/queue-6.15/maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch @@ -0,0 +1,40 @@ +From ea9b77f98d94c4d5c1bd1ac1db078f78b40e8bf5 Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Tue, 24 Jun 2025 15:18:40 -0400 +Subject: maple_tree: fix mt_destroy_walk() on root leaf node + +From: Wei Yang + +commit ea9b77f98d94c4d5c1bd1ac1db078f78b40e8bf5 upstream. + +On destroy, we should set each node dead. But current code miss this when +the maple tree has only the root node. + +The reason is mt_destroy_walk() leverage mte_destroy_descend() to set node +dead, but this is skipped since the only root node is a leaf. + +Fixes this by setting the node dead if it is a leaf. + +Link: https://lore.kernel.org/all/20250407231354.11771-1-richard.weiyang@gmail.com/ +Link: https://lkml.kernel.org/r/20250624191841.64682-1-Liam.Howlett@oracle.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Wei Yang +Signed-off-by: Liam R. Howlett +Reviewed-by: Dev Jain +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -5288,6 +5288,7 @@ static void mt_destroy_walk(struct maple + struct maple_enode *start; + + if (mte_is_leaf(enode)) { ++ mte_set_node_dead(enode); + node->type = mte_node_type(enode); + goto free_leaf; + } diff --git a/queue-6.15/mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch b/queue-6.15/mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch new file mode 100644 index 0000000000..2e41e780d9 --- /dev/null +++ b/queue-6.15/mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch @@ -0,0 +1,56 @@ +From bb1b5929b4279b136816f95ce1e8f1fa689bf4a1 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sun, 29 Jun 2025 13:49:14 -0700 +Subject: mm/damon/core: handle damon_call_control as normal under kdmond deactivation + +From: SeongJae Park + +commit bb1b5929b4279b136816f95ce1e8f1fa689bf4a1 upstream. + +DAMON sysfs interface internally uses damon_call() to update DAMON +parameters as users requested, online. However, DAMON core cancels any +damon_call() requests when it is deactivated by DAMOS watermarks. + +As a result, users cannot change DAMON parameters online while DAMON is +deactivated. Note that users can turn DAMON off and on with different +watermarks to work around. Since deactivated DAMON is nearly same to +stopped DAMON, the work around should have no big problem. Anyway, a bug +is a bug. + +There is no real good reason to cancel the damon_call() request under +DAMOS deactivation. Fix it by simply handling the request as normal, +rather than cancelling under the situation. + +Link: https://lkml.kernel.org/r/20250629204914.54114-1-sj@kernel.org +Fixes: 42b7491af14c ("mm/damon/core: introduce damon_call()") +Signed-off-by: SeongJae Park +Cc: [6.14+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -2306,9 +2306,8 @@ static void kdamond_usleep(unsigned long + * + * If there is a &struct damon_call_control request that registered via + * &damon_call() on @ctx, do or cancel the invocation of the function depending +- * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS +- * watermarks, or the kdamond is already out of the main loop and therefore +- * will be terminated. ++ * on @cancel. @cancel is set when the kdamond is already out of the main loop ++ * and therefore will be terminated. + */ + static void kdamond_call(struct damon_ctx *ctx, bool cancel) + { +@@ -2356,7 +2355,7 @@ static int kdamond_wait_activation(struc + if (ctx->callback.after_wmarks_check && + ctx->callback.after_wmarks_check(ctx)) + break; +- kdamond_call(ctx, true); ++ kdamond_call(ctx, false); + damos_walk_cancel(ctx); + } + return -EBUSY; diff --git a/queue-6.15/mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch b/queue-6.15/mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch new file mode 100644 index 0000000000..ebbdde825d --- /dev/null +++ b/queue-6.15/mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch @@ -0,0 +1,43 @@ +From bd225b9591442065beb876da72656f4a2d627d03 Mon Sep 17 00:00:00 2001 +From: Honggyu Kim +Date: Wed, 2 Jul 2025 09:02:04 +0900 +Subject: mm/damon: fix divide by zero in damon_get_intervals_score() + +From: Honggyu Kim + +commit bd225b9591442065beb876da72656f4a2d627d03 upstream. + +The current implementation allows having zero size regions with no special +reasons, but damon_get_intervals_score() gets crashed by divide by zero +when the region size is zero. + + [ 29.403950] Oops: divide error: 0000 [#1] SMP NOPTI + +This patch fixes the bug, but does not disallow zero size regions to keep +the backward compatibility since disallowing zero size regions might be a +breaking change for some users. + +In addition, the same crash can happen when intervals_goal.access_bp is +zero so this should be fixed in stable trees as well. + +Link: https://lkml.kernel.org/r/20250702000205.1921-5-honggyu.kim@sk.com +Fixes: f04b0fedbe71 ("mm/damon/core: implement intervals auto-tuning") +Signed-off-by: Honggyu Kim +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -1427,6 +1427,7 @@ static unsigned long damon_get_intervals + } + } + target_access_events = max_access_events * goal_bp / 10000; ++ target_access_events = target_access_events ? : 1; + return access_events * 10000 / target_access_events; + } + diff --git a/queue-6.15/mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch b/queue-6.15/mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch new file mode 100644 index 0000000000..e5e53653bf --- /dev/null +++ b/queue-6.15/mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch @@ -0,0 +1,138 @@ +From 82241a83cd15aaaf28200a40ad1a8b480012edaf Mon Sep 17 00:00:00 2001 +From: Baolin Wang +Date: Thu, 5 Jun 2025 20:58:29 +0800 +Subject: mm: fix the inaccurate memory statistics issue for users + +From: Baolin Wang + +commit 82241a83cd15aaaf28200a40ad1a8b480012edaf upstream. + +On some large machines with a high number of CPUs running a 64K pagesize +kernel, we found that the 'RES' field is always 0 displayed by the top +command for some processes, which will cause a lot of confusion for users. + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 875525 root 20 0 12480 0 0 R 0.3 0.0 0:00.08 top + 1 root 20 0 172800 0 0 S 0.0 0.0 0:04.52 systemd + +The main reason is that the batch size of the percpu counter is quite +large on these machines, caching a significant percpu value, since +converting mm's rss stats into percpu_counter by commit f1a7941243c1 ("mm: +convert mm's rss stats into percpu_counter"). Intuitively, the batch +number should be optimized, but on some paths, performance may take +precedence over statistical accuracy. Therefore, introducing a new +interface to add the percpu statistical count and display it to users, +which can remove the confusion. In addition, this change is not expected +to be on a performance-critical path, so the modification should be +acceptable. + +In addition, the 'mm->rss_stat' is updated by using add_mm_counter() and +dec/inc_mm_counter(), which are all wrappers around +percpu_counter_add_batch(). In percpu_counter_add_batch(), there is +percpu batch caching to avoid 'fbc->lock' contention. This patch changes +task_mem() and task_statm() to get the accurate mm counters under the +'fbc->lock', but this should not exacerbate kernel 'mm->rss_stat' lock +contention due to the percpu batch caching of the mm counters. The +following test also confirm the theoretical analysis. + +I run the stress-ng that stresses anon page faults in 32 threads on my 32 +cores machine, while simultaneously running a script that starts 32 +threads to busy-loop pread each stress-ng thread's /proc/pid/status +interface. From the following data, I did not observe any obvious impact +of this patch on the stress-ng tests. + +w/o patch: +stress-ng: info: [6848] 4,399,219,085,152 CPU Cycles 67.327 B/sec +stress-ng: info: [6848] 1,616,524,844,832 Instructions 24.740 B/sec (0.367 instr. per cycle) +stress-ng: info: [6848] 39,529,792 Page Faults Total 0.605 M/sec +stress-ng: info: [6848] 39,529,792 Page Faults Minor 0.605 M/sec + +w/patch: +stress-ng: info: [2485] 4,462,440,381,856 CPU Cycles 68.382 B/sec +stress-ng: info: [2485] 1,615,101,503,296 Instructions 24.750 B/sec (0.362 instr. per cycle) +stress-ng: info: [2485] 39,439,232 Page Faults Total 0.604 M/sec +stress-ng: info: [2485] 39,439,232 Page Faults Minor 0.604 M/sec + +On comparing a very simple app which just allocates & touches some +memory against v6.1 (which doesn't have f1a7941243c1) and latest Linus +tree (4c06e63b9203) I can see that on latest Linus tree the values for +VmRSS, RssAnon and RssFile from /proc/self/status are all zeroes while +they do report values on v6.1 and a Linus tree with this patch. + +Link: https://lkml.kernel.org/r/f4586b17f66f97c174f7fd1f8647374fdb53de1c.1749119050.git.baolin.wang@linux.alibaba.com +Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter") +Signed-off-by: Baolin Wang +Reviewed-by: Aboorva Devarajan +Tested-by: Aboorva Devarajan +Tested-by Donet Tom +Acked-by: Shakeel Butt +Acked-by: SeongJae Park +Acked-by: Michal Hocko +Reviewed-by: Vlastimil Babka +Cc: David Hildenbrand +Cc: Liam Howlett +Cc: Lorenzo Stoakes +Cc: Mike Rapoport +Cc: Suren Baghdasaryan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/proc/task_mmu.c | 14 +++++++------- + include/linux/mm.h | 5 +++++ + 2 files changed, 12 insertions(+), 7 deletions(-) + +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct + unsigned long text, lib, swap, anon, file, shmem; + unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; + +- anon = get_mm_counter(mm, MM_ANONPAGES); +- file = get_mm_counter(mm, MM_FILEPAGES); +- shmem = get_mm_counter(mm, MM_SHMEMPAGES); ++ anon = get_mm_counter_sum(mm, MM_ANONPAGES); ++ file = get_mm_counter_sum(mm, MM_FILEPAGES); ++ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); + + /* + * Note: to minimize their overhead, mm maintains hiwater_vm and +@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct + text = min(text, mm->exec_vm << PAGE_SHIFT); + lib = (mm->exec_vm << PAGE_SHIFT) - text; + +- swap = get_mm_counter(mm, MM_SWAPENTS); ++ swap = get_mm_counter_sum(mm, MM_SWAPENTS); + SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); + SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); + SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); +@@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struc + unsigned long *shared, unsigned long *text, + unsigned long *data, unsigned long *resident) + { +- *shared = get_mm_counter(mm, MM_FILEPAGES) + +- get_mm_counter(mm, MM_SHMEMPAGES); ++ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + ++ get_mm_counter_sum(mm, MM_SHMEMPAGES); + *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) + >> PAGE_SHIFT; + *data = mm->data_vm + mm->stack_vm; +- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); ++ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); + return mm->total_vm; + } + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2708,6 +2708,11 @@ static inline unsigned long get_mm_count + return percpu_counter_read_positive(&mm->rss_stat[member]); + } + ++static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) ++{ ++ return percpu_counter_sum_positive(&mm->rss_stat[member]); ++} ++ + void mm_trace_rss_stat(struct mm_struct *mm, int member); + + static inline void add_mm_counter(struct mm_struct *mm, int member, long value) diff --git a/queue-6.15/mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch b/queue-6.15/mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch new file mode 100644 index 0000000000..b5a5bb202e --- /dev/null +++ b/queue-6.15/mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch @@ -0,0 +1,140 @@ +From ddd05742b45b083975a0855ef6ebbf88cf1f532a Mon Sep 17 00:00:00 2001 +From: Lance Yang +Date: Fri, 27 Jun 2025 14:23:19 +0800 +Subject: mm/rmap: fix potential out-of-bounds page table access during batched unmap + +From: Lance Yang + +commit ddd05742b45b083975a0855ef6ebbf88cf1f532a upstream. + +As pointed out by David[1], the batched unmap logic in +try_to_unmap_one() may read past the end of a PTE table when a large +folio's PTE mappings are not fully contained within a single page +table. + +While this scenario might be rare, an issue triggerable from userspace +must be fixed regardless of its likelihood. This patch fixes the +out-of-bounds access by refactoring the logic into a new helper, +folio_unmap_pte_batch(). + +The new helper correctly calculates the safe batch size by capping the +scan at both the VMA and PMD boundaries. To simplify the code, it also +supports partial batching (i.e., any number of pages from 1 up to the +calculated safe maximum), as there is no strong reason to special-case +for fully mapped folios. + +Link: https://lkml.kernel.org/r/20250701143100.6970-1-lance.yang@linux.dev +Link: https://lkml.kernel.org/r/20250630011305.23754-1-lance.yang@linux.dev +Link: https://lkml.kernel.org/r/20250627062319.84936-1-lance.yang@linux.dev +Link: https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redhat.com [1] +Fixes: 354dffd29575 ("mm: support batched unmap for lazyfree large folios during reclamation") +Signed-off-by: Lance Yang +Suggested-by: David Hildenbrand +Reported-by: David Hildenbrand +Closes: https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redhat.com +Suggested-by: Barry Song +Acked-by: Barry Song +Reviewed-by: Lorenzo Stoakes +Acked-by: David Hildenbrand +Reviewed-by: Harry Yoo +Cc: Baolin Wang +Cc: Chris Li +Cc: "Huang, Ying" +Cc: Kairui Song +Cc: Lance Yang +Cc: Liam Howlett +Cc: Mingzhe Yang +Cc: Rik van Riel +Cc: Ryan Roberts +Cc: Tangquan Zheng +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/rmap.c | 46 ++++++++++++++++++++++++++++------------------ + 1 file changed, 28 insertions(+), 18 deletions(-) + +diff --git a/mm/rmap.c b/mm/rmap.c +index fb63d9256f09..1320b88fab74 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, + #endif + } + +-/* We support batch unmapping of PTEs for lazyfree large folios */ +-static inline bool can_batch_unmap_folio_ptes(unsigned long addr, +- struct folio *folio, pte_t *ptep) ++static inline unsigned int folio_unmap_pte_batch(struct folio *folio, ++ struct page_vma_mapped_walk *pvmw, ++ enum ttu_flags flags, pte_t pte) + { + const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; +- int max_nr = folio_nr_pages(folio); +- pte_t pte = ptep_get(ptep); ++ unsigned long end_addr, addr = pvmw->address; ++ struct vm_area_struct *vma = pvmw->vma; ++ unsigned int max_nr; + ++ if (flags & TTU_HWPOISON) ++ return 1; ++ if (!folio_test_large(folio)) ++ return 1; ++ ++ /* We may only batch within a single VMA and a single page table. */ ++ end_addr = pmd_addr_end(addr, vma->vm_end); ++ max_nr = (end_addr - addr) >> PAGE_SHIFT; ++ ++ /* We only support lazyfree batching for now ... */ + if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) +- return false; ++ return 1; + if (pte_unused(pte)) +- return false; +- if (pte_pfn(pte) != folio_pfn(folio)) +- return false; ++ return 1; + +- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, +- NULL, NULL) == max_nr; ++ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags, ++ NULL, NULL, NULL); + } + + /* +@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, + if (pte_dirty(pteval)) + folio_mark_dirty(folio); + } else if (likely(pte_present(pteval))) { +- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && +- can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) +- nr_pages = folio_nr_pages(folio); ++ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval); + end_addr = address + nr_pages * PAGE_SIZE; + flush_cache_range(vma, address, end_addr); + +@@ -2206,13 +2213,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, + hugetlb_remove_rmap(folio); + } else { + folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); +- folio_ref_sub(folio, nr_pages - 1); + } + if (vma->vm_flags & VM_LOCKED) + mlock_drain_local(); +- folio_put(folio); +- /* We have already batched the entire folio */ +- if (nr_pages > 1) ++ folio_put_refs(folio, nr_pages); ++ ++ /* ++ * If we are sure that we batched the entire folio and cleared ++ * all PTEs, we can just optimize and stop right here. ++ */ ++ if (nr_pages == folio_nr_pages(folio)) + goto walk_done; + continue; + walk_abort: +-- +2.50.1 + diff --git a/queue-6.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch b/queue-6.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch new file mode 100644 index 0000000000..005b2a940f --- /dev/null +++ b/queue-6.15/mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch @@ -0,0 +1,69 @@ +From fea18c686320a53fce7ad62a87a3e1d10ad02f31 Mon Sep 17 00:00:00 2001 +From: Alexander Gordeev +Date: Mon, 23 Jun 2025 09:57:21 +0200 +Subject: mm/vmalloc: leave lazy MMU mode on PTE mapping error + +From: Alexander Gordeev + +commit fea18c686320a53fce7ad62a87a3e1d10ad02f31 upstream. + +vmap_pages_pte_range() enters the lazy MMU mode, but fails to leave it in +case an error is encountered. + +Link: https://lkml.kernel.org/r/20250623075721.2817094-1-agordeev@linux.ibm.com +Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified") +Signed-off-by: Alexander Gordeev +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Closes: https://lore.kernel.org/r/202506132017.T1l1l6ME-lkp@intel.com/ +Reviewed-by: Ryan Roberts +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 22 +++++++++++++++------- + 1 file changed, 15 insertions(+), 7 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -487,6 +487,7 @@ static int vmap_pages_pte_range(pmd_t *p + unsigned long end, pgprot_t prot, struct page **pages, int *nr, + pgtbl_mod_mask *mask) + { ++ int err = 0; + pte_t *pte; + + /* +@@ -500,18 +501,25 @@ static int vmap_pages_pte_range(pmd_t *p + do { + struct page *page = pages[*nr]; + +- if (WARN_ON(!pte_none(ptep_get(pte)))) +- return -EBUSY; +- if (WARN_ON(!page)) +- return -ENOMEM; +- if (WARN_ON(!pfn_valid(page_to_pfn(page)))) +- return -EINVAL; ++ if (WARN_ON(!pte_none(ptep_get(pte)))) { ++ err = -EBUSY; ++ break; ++ } ++ if (WARN_ON(!page)) { ++ err = -ENOMEM; ++ break; ++ } ++ if (WARN_ON(!pfn_valid(page_to_pfn(page)))) { ++ err = -EINVAL; ++ break; ++ } + + set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); + (*nr)++; + } while (pte++, addr += PAGE_SIZE, addr != end); + *mask |= PGTBL_PTE_MODIFIED; +- return 0; ++ ++ return err; + } + + static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr, diff --git a/queue-6.15/samples-damon-fix-damon-sample-prcl-for-start-failure.patch b/queue-6.15/samples-damon-fix-damon-sample-prcl-for-start-failure.patch new file mode 100644 index 0000000000..e3b9c228c0 --- /dev/null +++ b/queue-6.15/samples-damon-fix-damon-sample-prcl-for-start-failure.patch @@ -0,0 +1,85 @@ +From d9e01c62b7a0c258a7481c083f84c766a8f5597c Mon Sep 17 00:00:00 2001 +From: Honggyu Kim +Date: Wed, 2 Jul 2025 09:02:01 +0900 +Subject: samples/damon: fix damon sample prcl for start failure + +From: Honggyu Kim + +commit d9e01c62b7a0c258a7481c083f84c766a8f5597c upstream. + +Patch series "mm/damon: fix divide by zero and its samples", v3. + +This series includes fixes against damon and its samples to make it safer +when damon sample starting fails. + +It includes the following changes. +- fix unexpected divide by zero crash for zero size regions +- fix bugs for damon samples in case of start failures + + +This patch (of 4): + +The damon_sample_prcl_start() can fail so we must reset the "enable" +parameter to "false" again for proper rollback. + +In such cases, setting Y to "enable" then N triggers the following crash +because damon sample start failed but the "enable" stays as Y. + + [ 2441.419649] damon_sample_prcl: start + [ 2454.146817] damon_sample_prcl: stop + [ 2454.146862] ------------[ cut here ]------------ + [ 2454.146865] kernel BUG at mm/slub.c:546! + [ 2454.148183] Oops: invalid opcode: 0000 [#1] SMP NOPTI + ... + [ 2454.167555] Call Trace: + [ 2454.167822] + [ 2454.168061] damon_destroy_ctx+0x78/0x140 + [ 2454.168454] damon_sample_prcl_enable_store+0x8d/0xd0 + [ 2454.168932] param_attr_store+0xa1/0x120 + [ 2454.169315] module_attr_store+0x20/0x50 + [ 2454.169695] sysfs_kf_write+0x72/0x90 + [ 2454.170065] kernfs_fop_write_iter+0x150/0x1e0 + [ 2454.170491] vfs_write+0x315/0x440 + [ 2454.170833] ksys_write+0x69/0xf0 + [ 2454.171162] __x64_sys_write+0x19/0x30 + [ 2454.171525] x64_sys_call+0x18b2/0x2700 + [ 2454.171900] do_syscall_64+0x7f/0x680 + [ 2454.172258] ? exit_to_user_mode_loop+0xf6/0x180 + [ 2454.172694] ? clear_bhb_loop+0x30/0x80 + [ 2454.173067] ? clear_bhb_loop+0x30/0x80 + [ 2454.173439] entry_SYSCALL_64_after_hwframe+0x76/0x7e + +Link: https://lkml.kernel.org/r/20250702000205.1921-1-honggyu.kim@sk.com +Link: https://lkml.kernel.org/r/20250702000205.1921-2-honggyu.kim@sk.com +Fixes: 2aca254620a8 ("samples/damon: introduce a skeleton of a smaple DAMON module for proactive reclamation") +Signed-off-by: Honggyu Kim +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + samples/damon/prcl.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c +index 056b1b21a0fe..5597e6a08ab2 100644 +--- a/samples/damon/prcl.c ++++ b/samples/damon/prcl.c +@@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store( + if (enable == enabled) + return 0; + +- if (enable) +- return damon_sample_prcl_start(); ++ if (enable) { ++ err = damon_sample_prcl_start(); ++ if (err) ++ enable = false; ++ return err; ++ } + damon_sample_prcl_stop(); + return 0; + } +-- +2.50.1 + diff --git a/queue-6.15/samples-damon-fix-damon-sample-wsse-for-start-failure.patch b/queue-6.15/samples-damon-fix-damon-sample-wsse-for-start-failure.patch new file mode 100644 index 0000000000..6f70cec257 --- /dev/null +++ b/queue-6.15/samples-damon-fix-damon-sample-wsse-for-start-failure.patch @@ -0,0 +1,48 @@ +From f1221c8442616a6927aff836327777144545cb29 Mon Sep 17 00:00:00 2001 +From: Honggyu Kim +Date: Wed, 2 Jul 2025 09:02:02 +0900 +Subject: samples/damon: fix damon sample wsse for start failure + +From: Honggyu Kim + +commit f1221c8442616a6927aff836327777144545cb29 upstream. + +The damon_sample_wsse_start() can fail so we must reset the "enable" +parameter to "false" again for proper rollback. + +In such cases, setting Y to "enable" then N triggers the similar crash +with wsse because damon sample start failed but the "enable" stays as Y. + +Link: https://lkml.kernel.org/r/20250702000205.1921-3-honggyu.kim@sk.com +Fixes: b757c6cfc696 ("samples/damon/wsse: start and stop DAMON as the user requests") +Signed-off-by: Honggyu Kim +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + samples/damon/wsse.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c +index 11be25803274..e20238a249e7 100644 +--- a/samples/damon/wsse.c ++++ b/samples/damon/wsse.c +@@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store( + if (enable == enabled) + return 0; + +- if (enable) +- return damon_sample_wsse_start(); ++ if (enable) { ++ err = damon_sample_wsse_start(); ++ if (err) ++ enable = false; ++ return err; ++ } + damon_sample_wsse_stop(); + return 0; + } +-- +2.50.1 + diff --git a/queue-6.15/scripts-gdb-de-reference-per-cpu-mce-interrupts.patch b/queue-6.15/scripts-gdb-de-reference-per-cpu-mce-interrupts.patch new file mode 100644 index 0000000000..a6a0dd4cf1 --- /dev/null +++ b/queue-6.15/scripts-gdb-de-reference-per-cpu-mce-interrupts.patch @@ -0,0 +1,45 @@ +From 50f4d2ba26d5c3a4687ae0569be3bbf1c8f0cbed Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Mon, 23 Jun 2025 20:00:19 -0700 +Subject: scripts/gdb: de-reference per-CPU MCE interrupts + +From: Florian Fainelli + +commit 50f4d2ba26d5c3a4687ae0569be3bbf1c8f0cbed upstream. + +The per-CPU MCE interrupts are looked up by reference and need to be +de-referenced before printing, otherwise we print the addresses of the +variables instead of their contents: + +MCE: 18379471554386948492 Machine check exceptions +MCP: 18379471554386948488 Machine check polls + +The corrected output looks like this instead now: + +MCE: 0 Machine check exceptions +MCP: 1 Machine check polls + +Link: https://lkml.kernel.org/r/20250625021109.1057046-1-florian.fainelli@broadcom.com +Link: https://lkml.kernel.org/r/20250624030020.882472-1-florian.fainelli@broadcom.com +Fixes: b0969d7687a7 ("scripts/gdb: print interrupts") +Signed-off-by: Florian Fainelli +Cc: Jan Kiszka +Cc: Kieran Bingham +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + scripts/gdb/linux/interrupts.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/scripts/gdb/linux/interrupts.py ++++ b/scripts/gdb/linux/interrupts.py +@@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc): + pvar = gdb.parse_and_eval(var) + text = "%*s: " % (prec, pfx) + for cpu in cpus.each_online_cpu(): +- text += "%10u " % (cpus.per_cpu(pvar, cpu)) ++ text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference()) + text += " %s\n" % (desc) + return text + diff --git a/queue-6.15/scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch b/queue-6.15/scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch new file mode 100644 index 0000000000..88aba99ed8 --- /dev/null +++ b/queue-6.15/scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch @@ -0,0 +1,35 @@ +From 7627b459aa0737bdd62a8591a1481cda467f20e3 Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Mon, 23 Jun 2025 09:41:52 -0700 +Subject: scripts/gdb: fix interrupts display after MCP on x86 + +From: Florian Fainelli + +commit 7627b459aa0737bdd62a8591a1481cda467f20e3 upstream. + +The text line would not be appended to as it should have, it should have +been a '+=' but ended up being a '==', fix that. + +Link: https://lkml.kernel.org/r/20250623164153.746359-1-florian.fainelli@broadcom.com +Fixes: b0969d7687a7 ("scripts/gdb: print interrupts") +Signed-off-by: Florian Fainelli +Cc: Jan Kiszka +Cc: Kieran Bingham +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + scripts/gdb/linux/interrupts.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/scripts/gdb/linux/interrupts.py ++++ b/scripts/gdb/linux/interrupts.py +@@ -142,7 +142,7 @@ def x86_show_interupts(prec): + + if constants.LX_CONFIG_X86_MCE: + text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") +- text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") ++ text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + + text += show_irq_err_count(prec) + diff --git a/queue-6.15/scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch b/queue-6.15/scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch new file mode 100644 index 0000000000..7854beee07 --- /dev/null +++ b/queue-6.15/scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch @@ -0,0 +1,389 @@ +From a02b0cde8ee515ee0c8efd33e7fbe6830c282e69 Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Tue, 24 Jun 2025 19:10:20 -0700 +Subject: scripts/gdb: fix interrupts.py after maple tree conversion + +From: Florian Fainelli + +commit a02b0cde8ee515ee0c8efd33e7fbe6830c282e69 upstream. + +In commit 721255b9826b ("genirq: Use a maple tree for interrupt descriptor +management"), the irq_desc_tree was replaced with a sparse_irqs tree using +a maple tree structure. Since the script looked for the irq_desc_tree +symbol which is no longer available, no interrupts would be printed and +the script output would not be useful anymore. + +In addition to looking up the correct symbol (sparse_irqs), a new module +(mapletree.py) is added whose mtree_load() implementation is largely +copied after the C version and uses the same variable and intermediate +function names wherever possible to ensure that both the C and Python +version be updated in the future. + +This restores the scripts' output to match that of /proc/interrupts. + +Link: https://lkml.kernel.org/r/20250625021020.1056930-1-florian.fainelli@broadcom.com +Fixes: 721255b9826b ("genirq: Use a maple tree for interrupt descriptor management") +Signed-off-by: Florian Fainelli +Cc: Jan Kiszka +Cc: Kieran Bingham +Cc: Shanker Donthineni +Cc: Thomas Gleinxer +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + scripts/gdb/linux/constants.py.in | 7 + + scripts/gdb/linux/interrupts.py | 12 - + scripts/gdb/linux/mapletree.py | 252 ++++++++++++++++++++++++++++++++++++++ + scripts/gdb/linux/xarray.py | 28 ++++ + 4 files changed, 293 insertions(+), 6 deletions(-) + create mode 100644 scripts/gdb/linux/mapletree.py + create mode 100644 scripts/gdb/linux/xarray.py + +--- a/scripts/gdb/linux/constants.py.in ++++ b/scripts/gdb/linux/constants.py.in +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE) + LX_GDBPARSED(RADIX_TREE_MAP_SHIFT) + LX_GDBPARSED(RADIX_TREE_MAP_MASK) + ++/* linux/maple_tree.h */ ++LX_VALUE(MAPLE_NODE_SLOTS) ++LX_VALUE(MAPLE_RANGE64_SLOTS) ++LX_VALUE(MAPLE_ARANGE64_SLOTS) ++LX_GDBPARSED(MAPLE_NODE_MASK) ++ + /* linux/vmalloc.h */ + LX_VALUE(VM_IOREMAP) + LX_VALUE(VM_ALLOC) +--- a/scripts/gdb/linux/interrupts.py ++++ b/scripts/gdb/linux/interrupts.py +@@ -7,7 +7,7 @@ import gdb + from linux import constants + from linux import cpus + from linux import utils +-from linux import radixtree ++from linux import mapletree + + irq_desc_type = utils.CachedType("struct irq_desc") + +@@ -23,12 +23,12 @@ def irqd_is_level(desc): + def show_irq_desc(prec, irq): + text = "" + +- desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq) ++ desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq) + if desc is None: + return text + +- desc = desc.cast(irq_desc_type.get_type()) +- if desc is None: ++ desc = desc.cast(irq_desc_type.get_type().pointer()) ++ if desc == 0: + return text + + if irq_settings_is_hidden(desc): +@@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command): + gdb.write("CPU%-8d" % cpu) + gdb.write("\n") + +- if utils.gdb_eval_or_none("&irq_desc_tree") is None: +- return ++ if utils.gdb_eval_or_none("&sparse_irqs") is None: ++ raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?") + + for irq in range(nr_irqs): + gdb.write(show_irq_desc(prec, irq)) +--- /dev/null ++++ b/scripts/gdb/linux/mapletree.py +@@ -0,0 +1,252 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Maple tree helpers ++# ++# Copyright (c) 2025 Broadcom ++# ++# Authors: ++# Florian Fainelli ++ ++import gdb ++ ++from linux import utils ++from linux import constants ++from linux import xarray ++ ++maple_tree_root_type = utils.CachedType("struct maple_tree") ++maple_node_type = utils.CachedType("struct maple_node") ++maple_enode_type = utils.CachedType("void") ++ ++maple_dense = 0 ++maple_leaf_64 = 1 ++maple_range_64 = 2 ++maple_arange_64 = 3 ++ ++class Mas(object): ++ ma_active = 0 ++ ma_start = 1 ++ ma_root = 2 ++ ma_none = 3 ++ ma_pause = 4 ++ ma_overflow = 5 ++ ma_underflow = 6 ++ ma_error = 7 ++ ++ def __init__(self, mt, first, end): ++ if mt.type == maple_tree_root_type.get_type().pointer(): ++ self.tree = mt.dereference() ++ elif mt.type != maple_tree_root_type.get_type(): ++ raise gdb.GdbError("must be {} not {}" ++ .format(maple_tree_root_type.get_type().pointer(), mt.type)) ++ self.tree = mt ++ self.index = first ++ self.last = end ++ self.node = None ++ self.status = self.ma_start ++ self.min = 0 ++ self.max = -1 ++ ++ def is_start(self): ++ # mas_is_start() ++ return self.status == self.ma_start ++ ++ def is_ptr(self): ++ # mas_is_ptr() ++ return self.status == self.ma_root ++ ++ def is_none(self): ++ # mas_is_none() ++ return self.status == self.ma_none ++ ++ def root(self): ++ # mas_root() ++ return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer()) ++ ++ def start(self): ++ # mas_start() ++ if self.is_start() is False: ++ return None ++ ++ self.min = 0 ++ self.max = ~0 ++ ++ while True: ++ self.depth = 0 ++ root = self.root() ++ if xarray.xa_is_node(root): ++ self.depth = 0 ++ self.status = self.ma_active ++ self.node = mte_safe_root(root) ++ self.offset = 0 ++ if mte_dead_node(self.node) is True: ++ continue ++ ++ return None ++ ++ self.node = None ++ # Empty tree ++ if root is None: ++ self.status = self.ma_none ++ self.offset = constants.LX_MAPLE_NODE_SLOTS ++ return None ++ ++ # Single entry tree ++ self.status = self.ma_root ++ self.offset = constants.LX_MAPLE_NODE_SLOTS ++ ++ if self.index != 0: ++ return None ++ ++ return root ++ ++ return None ++ ++ def reset(self): ++ # mas_reset() ++ self.status = self.ma_start ++ self.node = None ++ ++def mte_safe_root(node): ++ if node.type != maple_enode_type.get_type().pointer(): ++ raise gdb.GdbError("{} must be {} not {}" ++ .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type)) ++ ulong_type = utils.get_ulong_type() ++ indirect_ptr = node.cast(ulong_type) & ~0x2 ++ val = indirect_ptr.cast(maple_enode_type.get_type().pointer()) ++ return val ++ ++def mte_node_type(entry): ++ ulong_type = utils.get_ulong_type() ++ val = None ++ if entry.type == maple_enode_type.get_type().pointer(): ++ val = entry.cast(ulong_type) ++ elif entry.type == ulong_type: ++ val = entry ++ else: ++ raise gdb.GdbError("{} must be {} not {}" ++ .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type)) ++ return (val >> 0x3) & 0xf ++ ++def ma_dead_node(node): ++ if node.type != maple_node_type.get_type().pointer(): ++ raise gdb.GdbError("{} must be {} not {}" ++ .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type)) ++ ulong_type = utils.get_ulong_type() ++ parent = node['parent'] ++ indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK ++ return indirect_ptr == node ++ ++def mte_to_node(enode): ++ ulong_type = utils.get_ulong_type() ++ if enode.type == maple_enode_type.get_type().pointer(): ++ indirect_ptr = enode.cast(ulong_type) ++ elif enode.type == ulong_type: ++ indirect_ptr = enode ++ else: ++ raise gdb.GdbError("{} must be {} not {}" ++ .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) ++ indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK ++ return indirect_ptr.cast(maple_node_type.get_type().pointer()) ++ ++def mte_dead_node(enode): ++ if enode.type != maple_enode_type.get_type().pointer(): ++ raise gdb.GdbError("{} must be {} not {}" ++ .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) ++ node = mte_to_node(enode) ++ return ma_dead_node(node) ++ ++def ma_is_leaf(tp): ++ result = tp < maple_range_64 ++ return tp < maple_range_64 ++ ++def mt_pivots(t): ++ if t == maple_dense: ++ return 0 ++ elif t == maple_leaf_64 or t == maple_range_64: ++ return constants.LX_MAPLE_RANGE64_SLOTS - 1 ++ elif t == maple_arange_64: ++ return constants.LX_MAPLE_ARANGE64_SLOTS - 1 ++ ++def ma_pivots(node, t): ++ if node.type != maple_node_type.get_type().pointer(): ++ raise gdb.GdbError("{}: must be {} not {}" ++ .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type)) ++ if t == maple_arange_64: ++ return node['ma64']['pivot'] ++ elif t == maple_leaf_64 or t == maple_range_64: ++ return node['mr64']['pivot'] ++ else: ++ return None ++ ++def ma_slots(node, tp): ++ if node.type != maple_node_type.get_type().pointer(): ++ raise gdb.GdbError("{}: must be {} not {}" ++ .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type)) ++ if tp == maple_arange_64: ++ return node['ma64']['slot'] ++ elif tp == maple_range_64 or tp == maple_leaf_64: ++ return node['mr64']['slot'] ++ elif tp == maple_dense: ++ return node['slot'] ++ else: ++ return None ++ ++def mt_slot(mt, slots, offset): ++ ulong_type = utils.get_ulong_type() ++ return slots[offset].cast(ulong_type) ++ ++def mtree_lookup_walk(mas): ++ ulong_type = utils.get_ulong_type() ++ n = mas.node ++ ++ while True: ++ node = mte_to_node(n) ++ tp = mte_node_type(n) ++ pivots = ma_pivots(node, tp) ++ end = mt_pivots(tp) ++ offset = 0 ++ while True: ++ if pivots[offset] >= mas.index: ++ break ++ if offset >= end: ++ break ++ offset += 1 ++ ++ slots = ma_slots(node, tp) ++ n = mt_slot(mas.tree, slots, offset) ++ if ma_dead_node(node) is True: ++ mas.reset() ++ return None ++ break ++ ++ if ma_is_leaf(tp) is True: ++ break ++ ++ return n ++ ++def mtree_load(mt, index): ++ ulong_type = utils.get_ulong_type() ++ # MT_STATE(...) ++ mas = Mas(mt, index, index) ++ entry = None ++ ++ while True: ++ entry = mas.start() ++ if mas.is_none(): ++ return None ++ ++ if mas.is_ptr(): ++ if index != 0: ++ entry = None ++ return entry ++ ++ entry = mtree_lookup_walk(mas) ++ if entry is None and mas.is_start(): ++ continue ++ else: ++ break ++ ++ if xarray.xa_is_zero(entry): ++ return None ++ ++ return entry +--- /dev/null ++++ b/scripts/gdb/linux/xarray.py +@@ -0,0 +1,28 @@ ++# SPDX-License-Identifier: GPL-2.0 ++# ++# Xarray helpers ++# ++# Copyright (c) 2025 Broadcom ++# ++# Authors: ++# Florian Fainelli ++ ++import gdb ++ ++from linux import utils ++from linux import constants ++ ++def xa_is_internal(entry): ++ ulong_type = utils.get_ulong_type() ++ return ((entry.cast(ulong_type) & 3) == 2) ++ ++def xa_mk_internal(v): ++ return ((v << 2) | 2) ++ ++def xa_is_zero(entry): ++ ulong_type = utils.get_ulong_type() ++ return entry.cast(ulong_type) == xa_mk_internal(257) ++ ++def xa_is_node(entry): ++ ulong_type = utils.get_ulong_type() ++ return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096) diff --git a/queue-6.15/scripts-gdb-vfs-support-external-dentry-names.patch b/queue-6.15/scripts-gdb-vfs-support-external-dentry-names.patch new file mode 100644 index 0000000000..fdb243b325 --- /dev/null +++ b/queue-6.15/scripts-gdb-vfs-support-external-dentry-names.patch @@ -0,0 +1,47 @@ +From e6d3e653b084f003977bf2e33820cb84d2e4541f Mon Sep 17 00:00:00 2001 +From: Illia Ostapyshyn +Date: Sun, 29 Jun 2025 02:38:11 +0200 +Subject: scripts: gdb: vfs: support external dentry names + +From: Illia Ostapyshyn + +commit e6d3e653b084f003977bf2e33820cb84d2e4541f upstream. + +d_shortname of struct dentry only reserves D_NAME_INLINE_LEN characters +and contains garbage for longer names. Use d_name instead, which always +references the valid name. + +Link: https://lore.kernel.org/all/20250525213709.878287-2-illia@yshyn.com/ +Link: https://lkml.kernel.org/r/20250629003811.2420418-1-illia@yshyn.com +Fixes: 79300ac805b6 ("scripts/gdb: fix dentry_name() lookup") +Signed-off-by: Illia Ostapyshyn +Tested-by: Florian Fainelli +Reviewed-by: Florian Fainelli +Cc: Al Viro +Cc: Christian Brauner +Cc: Jan Kara +Cc: Jan Kiszka +Cc: Kieran Bingham +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + scripts/gdb/linux/vfs.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py +index b5fbb18ccb77..9e921b645a68 100644 +--- a/scripts/gdb/linux/vfs.py ++++ b/scripts/gdb/linux/vfs.py +@@ -22,7 +22,7 @@ def dentry_name(d): + if parent == d or parent == 0: + return "" + p = dentry_name(d['d_parent']) + "/" +- return p + d['d_shortname']['string'].string() ++ return p + d['d_name']['name'].string() + + class DentryName(gdb.Function): + """Return string of the full path of a dentry. +-- +2.50.1 + diff --git a/queue-6.15/series b/queue-6.15/series index a9d3f6dc47..bb14b582b2 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -95,3 +95,18 @@ drm-framebuffer-acquire-internal-references-on-gem-handles.patch drm-xe-allocate-pf-queue-size-on-pow2-boundary.patch revert-acpi-battery-negate-current-when-discharging.patch revert-pci-acpi-fix-allocated-memory-release-on-error-in-pci_acpi_scan_root.patch +kallsyms-fix-build-without-execinfo.patch +kasan-remove-kasan_find_vm_area-to-prevent-possible-deadlock.patch +maple_tree-fix-mt_destroy_walk-on-root-leaf-node.patch +mm-fix-the-inaccurate-memory-statistics-issue-for-users.patch +scripts-gdb-fix-interrupts-display-after-mcp-on-x86.patch +scripts-gdb-de-reference-per-cpu-mce-interrupts.patch +scripts-gdb-fix-interrupts.py-after-maple-tree-conversion.patch +scripts-gdb-vfs-support-external-dentry-names.patch +mm-vmalloc-leave-lazy-mmu-mode-on-pte-mapping-error.patch +mm-rmap-fix-potential-out-of-bounds-page-table-access-during-batched-unmap.patch +mm-damon-core-handle-damon_call_control-as-normal-under-kdmond-deactivation.patch +mm-damon-fix-divide-by-zero-in-damon_get_intervals_score.patch +samples-damon-fix-damon-sample-prcl-for-start-failure.patch +samples-damon-fix-damon-sample-wsse-for-start-failure.patch +lib-alloc_tag-do-not-acquire-non-existent-lock-in-alloc_tag_top_users.patch -- 2.47.2