From: Greg Kroah-Hartman Date: Wed, 21 Jan 2026 17:01:44 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.12.67~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=127f49a526980706b7331253886d0054342c7ce1;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: bpf-reject-narrower-access-to-pointer-ctx-fields.patch dmaengine-fsl-edma-fix-clk-leak-on-alloc_chan_resources-failure.patch mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch mm-fake-numa-allow-later-numa-node-hotplug.patch mm-kmsan-fix-poisoning-of-high-order-non-compound-pages.patch mm-numa-memblock-include-asm-numa.h-for-numa_nodes_parsed.patch mm-page_alloc-batch-page-freeing-in-decay_pcp_high.patch mm-page_alloc-prevent-pcp-corruption-with-smp-n.patch mm-page_alloc-vmstat-simplify-refresh_cpu_vm_stats-change-detection.patch phy-phy-rockchip-inno-usb2-use-dev_err_probe-in-the-probe-path.patch phy-rockchip-inno-usb2-fix-a-double-free-bug-in-rockchip_usb2phy_probe.patch selftests-bpf-test-invalid-narrower-ctx-load.patch --- diff --git a/queue-6.12/bpf-reject-narrower-access-to-pointer-ctx-fields.patch b/queue-6.12/bpf-reject-narrower-access-to-pointer-ctx-fields.patch new file mode 100644 index 0000000000..11b41fd4cf --- /dev/null +++ b/queue-6.12/bpf-reject-narrower-access-to-pointer-ctx-fields.patch @@ -0,0 +1,164 @@ +From e09299225d5ba3916c91ef70565f7d2187e4cca0 Mon Sep 17 00:00:00 2001 +From: Paul Chaignon +Date: Tue, 22 Jul 2025 16:32:32 +0200 +Subject: bpf: Reject narrower access to pointer ctx fields + +From: Paul Chaignon + +commit e09299225d5ba3916c91ef70565f7d2187e4cca0 upstream. + +The following BPF program, simplified from a syzkaller repro, causes a +kernel warning: + + r0 = *(u8 *)(r1 + 169); + exit; + +With pointer field sk being at offset 168 in __sk_buff. This access is +detected as a narrower read in bpf_skb_is_valid_access because it +doesn't match offsetof(struct __sk_buff, sk). It is therefore allowed +and later proceeds to bpf_convert_ctx_access. Note that for the +"is_narrower_load" case in the convert_ctx_accesses(), the insn->off +is aligned, so the cnt may not be 0 because it matches the +offsetof(struct __sk_buff, sk) in the bpf_convert_ctx_access. However, +the target_size stays 0 and the verifier errors with a kernel warning: + + verifier bug: error during ctx access conversion(1) + +This patch fixes that to return a proper "invalid bpf_context access +off=X size=Y" error on the load instruction. + +The same issue affects multiple other fields in context structures that +allow narrow access. Some other non-affected fields (for sk_msg, +sk_lookup, and sockopt) were also changed to use bpf_ctx_range_ptr for +consistency. + +Note this syzkaller crash was reported in the "Closes" link below, which +used to be about a different bug, fixed in +commit fce7bd8e385a ("bpf/verifier: Handle BPF_LOAD_ACQ instructions +in insn_def_regno()"). Because syzbot somehow confused the two bugs, +the new crash and repro didn't get reported to the mailing list. + +Fixes: f96da09473b52 ("bpf: simplify narrower ctx access") +Fixes: 0df1a55afa832 ("bpf: Warn on internal verifier errors") +Reported-by: syzbot+0ef84a7bdf5301d4cbec@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=0ef84a7bdf5301d4cbec +Signed-off-by: Paul Chaignon +Signed-off-by: Martin KaFai Lau +Acked-by: Eduard Zingerman +Link: https://patch.msgid.link/3b8dcee67ff4296903351a974ddd9c4dca768b64.1753194596.git.paul.chaignon@gmail.com +Signed-off-by: Shung-Hsi Yu +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/cgroup.c | 8 ++++---- + net/core/filter.c | 20 ++++++++++---------- + 2 files changed, 14 insertions(+), 14 deletions(-) + +--- a/kernel/bpf/cgroup.c ++++ b/kernel/bpf/cgroup.c +@@ -2418,22 +2418,22 @@ static bool cg_sockopt_is_valid_access(i + } + + switch (off) { +- case offsetof(struct bpf_sockopt, sk): ++ case bpf_ctx_range_ptr(struct bpf_sockopt, sk): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCKET; + break; +- case offsetof(struct bpf_sockopt, optval): ++ case bpf_ctx_range_ptr(struct bpf_sockopt, optval): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET; + break; +- case offsetof(struct bpf_sockopt, optval_end): ++ case bpf_ctx_range_ptr(struct bpf_sockopt, optval_end): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET_END; + break; +- case offsetof(struct bpf_sockopt, retval): ++ case bpf_ctx_range(struct bpf_sockopt, retval): + if (size != size_default) + return false; + return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -8655,7 +8655,7 @@ static bool bpf_skb_is_valid_access(int + if (size != sizeof(__u64)) + return false; + break; +- case offsetof(struct __sk_buff, sk): ++ case bpf_ctx_range_ptr(struct __sk_buff, sk): + if (type == BPF_WRITE || size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; +@@ -9232,7 +9232,7 @@ static bool sock_addr_is_valid_access(in + return false; + } + break; +- case offsetof(struct bpf_sock_addr, sk): ++ case bpf_ctx_range_ptr(struct bpf_sock_addr, sk): + if (type != BPF_READ) + return false; + if (size != sizeof(__u64)) +@@ -9286,17 +9286,17 @@ static bool sock_ops_is_valid_access(int + if (size != sizeof(__u64)) + return false; + break; +- case offsetof(struct bpf_sock_ops, sk): ++ case bpf_ctx_range_ptr(struct bpf_sock_ops, sk): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCKET_OR_NULL; + break; +- case offsetof(struct bpf_sock_ops, skb_data): ++ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET; + break; +- case offsetof(struct bpf_sock_ops, skb_data_end): ++ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data_end): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_PACKET_END; +@@ -9305,7 +9305,7 @@ static bool sock_ops_is_valid_access(int + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, + size_default); +- case offsetof(struct bpf_sock_ops, skb_hwtstamp): ++ case bpf_ctx_range(struct bpf_sock_ops, skb_hwtstamp): + if (size != sizeof(__u64)) + return false; + break; +@@ -9375,17 +9375,17 @@ static bool sk_msg_is_valid_access(int o + return false; + + switch (off) { +- case offsetof(struct sk_msg_md, data): ++ case bpf_ctx_range_ptr(struct sk_msg_md, data): + info->reg_type = PTR_TO_PACKET; + if (size != sizeof(__u64)) + return false; + break; +- case offsetof(struct sk_msg_md, data_end): ++ case bpf_ctx_range_ptr(struct sk_msg_md, data_end): + info->reg_type = PTR_TO_PACKET_END; + if (size != sizeof(__u64)) + return false; + break; +- case offsetof(struct sk_msg_md, sk): ++ case bpf_ctx_range_ptr(struct sk_msg_md, sk): + if (size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCKET; +@@ -11598,7 +11598,7 @@ static bool sk_lookup_is_valid_access(in + return false; + + switch (off) { +- case offsetof(struct bpf_sk_lookup, sk): ++ case bpf_ctx_range_ptr(struct bpf_sk_lookup, sk): + info->reg_type = PTR_TO_SOCKET_OR_NULL; + return size == sizeof(__u64); + diff --git a/queue-6.12/dmaengine-fsl-edma-fix-clk-leak-on-alloc_chan_resources-failure.patch b/queue-6.12/dmaengine-fsl-edma-fix-clk-leak-on-alloc_chan_resources-failure.patch new file mode 100644 index 0000000000..94286cbcf2 --- /dev/null +++ b/queue-6.12/dmaengine-fsl-edma-fix-clk-leak-on-alloc_chan_resources-failure.patch @@ -0,0 +1,43 @@ +From stable+bounces-210735-greg=kroah.com@vger.kernel.org Wed Jan 21 13:06:07 2026 +From: Sasha Levin +Date: Wed, 21 Jan 2026 07:04:21 -0500 +Subject: dmaengine: fsl-edma: Fix clk leak on alloc_chan_resources failure +To: stable@vger.kernel.org +Cc: Zhen Ni , Frank Li , Vinod Koul , Sasha Levin +Message-ID: <20260121120421.1504853-1-sashal@kernel.org> + +From: Zhen Ni + +[ Upstream commit b18cd8b210417f90537d914ffb96e390c85a7379 ] + +When fsl_edma_alloc_chan_resources() fails after clk_prepare_enable(), +the error paths only free IRQs and destroy the TCD pool, but forget to +call clk_disable_unprepare(). This causes the channel clock to remain +enabled, leaking power and resources. + +Fix it by disabling the channel clock in the error unwind path. + +Fixes: d8d4355861d8 ("dmaengine: fsl-edma: add i.MX8ULP edma support") +Cc: stable@vger.kernel.org +Suggested-by: Frank Li +Signed-off-by: Zhen Ni +Reviewed-by: Frank Li +Link: https://patch.msgid.link/20251014090522.827726-1-zhen.ni@easystack.cn +Signed-off-by: Vinod Koul +[ Different error handling scheme ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma/fsl-edma-common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/dma/fsl-edma-common.c ++++ b/drivers/dma/fsl-edma-common.c +@@ -819,6 +819,7 @@ int fsl_edma_alloc_chan_resources(struct + + if (ret) { + dma_pool_destroy(fsl_chan->tcd_pool); ++ clk_disable_unprepare(fsl_chan->clk); + return ret; + } + } diff --git a/queue-6.12/mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch b/queue-6.12/mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch new file mode 100644 index 0000000000..a8d04dfa8d --- /dev/null +++ b/queue-6.12/mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch @@ -0,0 +1,49 @@ +From 392b3d9d595f34877dd745b470c711e8ebcd225c Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Wed, 24 Dec 2025 18:30:37 -0800 +Subject: mm/damon/sysfs-scheme: cleanup access_pattern subdirs on scheme dir setup failure + +From: SeongJae Park + +commit 392b3d9d595f34877dd745b470c711e8ebcd225c upstream. + +When a DAMOS-scheme DAMON sysfs directory setup fails after setup of +access_pattern/ directory, subdirectories of access_pattern/ directory are +not cleaned up. As a result, DAMON sysfs interface is nearly broken until +the system reboots, and the memory for the unremoved directory is leaked. + +Cleanup the directories under such failures. + +Link: https://lkml.kernel.org/r/20251225023043.18579-5-sj@kernel.org +Fixes: 9bbb820a5bd5 ("mm/damon/sysfs: support DAMOS quotas") +Signed-off-by: SeongJae Park +Cc: chongjiapeng +Cc: # 5.18.x +Signed-off-by: Andrew Morton +Signed-off-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs-schemes.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/mm/damon/sysfs-schemes.c ++++ b/mm/damon/sysfs-schemes.c +@@ -1606,7 +1606,7 @@ static int damon_sysfs_scheme_add_dirs(s + return err; + err = damon_sysfs_scheme_set_quotas(scheme); + if (err) +- goto put_access_pattern_out; ++ goto rmdir_put_access_pattern_out; + err = damon_sysfs_scheme_set_watermarks(scheme); + if (err) + goto rmdir_put_quotas_access_pattern_out; +@@ -1634,7 +1634,8 @@ rmdir_put_quotas_access_pattern_out: + damon_sysfs_quotas_rm_dirs(scheme->quotas); + kobject_put(&scheme->quotas->kobj); + scheme->quotas = NULL; +-put_access_pattern_out: ++rmdir_put_access_pattern_out: ++ damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); + kobject_put(&scheme->access_pattern->kobj); + scheme->access_pattern = NULL; + return err; diff --git a/queue-6.12/mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch b/queue-6.12/mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch new file mode 100644 index 0000000000..70d682320b --- /dev/null +++ b/queue-6.12/mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch @@ -0,0 +1,49 @@ +From dc7e1d75fd8c505096d0cddeca9e2efb2b55aaf9 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Wed, 24 Dec 2025 18:30:36 -0800 +Subject: mm/damon/sysfs-scheme: cleanup quotas subdirs on scheme dir setup failure + +From: SeongJae Park + +commit dc7e1d75fd8c505096d0cddeca9e2efb2b55aaf9 upstream. + +When a DAMOS-scheme DAMON sysfs directory setup fails after setup of +quotas/ directory, subdirectories of quotas/ directory are not cleaned up. +As a result, DAMON sysfs interface is nearly broken until the system +reboots, and the memory for the unremoved directory is leaked. + +Cleanup the directories under such failures. + +Link: https://lkml.kernel.org/r/20251225023043.18579-4-sj@kernel.org +Fixes: 1b32234ab087 ("mm/damon/sysfs: support DAMOS watermarks") +Signed-off-by: SeongJae Park +Cc: chongjiapeng +Cc: # 5.18.x +Signed-off-by: Andrew Morton +Signed-off-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs-schemes.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/mm/damon/sysfs-schemes.c ++++ b/mm/damon/sysfs-schemes.c +@@ -1609,7 +1609,7 @@ static int damon_sysfs_scheme_add_dirs(s + goto put_access_pattern_out; + err = damon_sysfs_scheme_set_watermarks(scheme); + if (err) +- goto put_quotas_access_pattern_out; ++ goto rmdir_put_quotas_access_pattern_out; + err = damon_sysfs_scheme_set_filters(scheme); + if (err) + goto put_watermarks_quotas_access_pattern_out; +@@ -1630,7 +1630,8 @@ put_filters_watermarks_quotas_access_pat + put_watermarks_quotas_access_pattern_out: + kobject_put(&scheme->watermarks->kobj); + scheme->watermarks = NULL; +-put_quotas_access_pattern_out: ++rmdir_put_quotas_access_pattern_out: ++ damon_sysfs_quotas_rm_dirs(scheme->quotas); + kobject_put(&scheme->quotas->kobj); + scheme->quotas = NULL; + put_access_pattern_out: diff --git a/queue-6.12/mm-fake-numa-allow-later-numa-node-hotplug.patch b/queue-6.12/mm-fake-numa-allow-later-numa-node-hotplug.patch new file mode 100644 index 0000000000..771afc2667 --- /dev/null +++ b/queue-6.12/mm-fake-numa-allow-later-numa-node-hotplug.patch @@ -0,0 +1,446 @@ +From stable+bounces-210672-greg=kroah.com@vger.kernel.org Wed Jan 21 04:07:06 2026 +From: Sasha Levin +Date: Tue, 20 Jan 2026 22:06:54 -0500 +Subject: mm/fake-numa: allow later numa node hotplug +To: stable@vger.kernel.org +Cc: Bruno Faccini , David Hildenbrand , John Hubbard , "Mike Rapoport (Microsoft)" , Zi Yan , Andrew Morton , Sasha Levin +Message-ID: <20260121030655.1173340-1-sashal@kernel.org> + +From: Bruno Faccini + +[ Upstream commit 63db8170bf34ce9e0763f87d993cf9b4c9002b09 ] + +Current fake-numa implementation prevents new Numa nodes to be later +hot-plugged by drivers. A common symptom of this limitation is the "node + was absent from the node_possible_map" message by associated warning +in mm/memory_hotplug.c: add_memory_resource(). + +This comes from the lack of remapping in both pxm_to_node_map[] and +node_to_pxm_map[] tables to take fake-numa nodes into account and thus +triggers collisions with original and physical nodes only-mapping that had +been determined from BIOS tables. + +This patch fixes this by doing the necessary node-ids translation in both +pxm_to_node_map[]/node_to_pxm_map[] tables. node_distance[] table has +also been fixed accordingly. + +Details: + +When trying to use fake-numa feature on our system where new Numa nodes +are being "hot-plugged" upon driver load, this fails with the following +type of message and warning with stack : + +node 8 was absent from the node_possible_map WARNING: CPU: 61 PID: 4259 at +mm/memory_hotplug.c:1506 add_memory_resource+0x3dc/0x418 + +This issue prevents the use of the fake-NUMA debug feature with the +system's full configuration, when it has proven to be sometimes extremely +useful for performance testing of multi-tasked, memory-bound applications, +as it enables better isolation of processes/ranks compared to fat NUMA +nodes. + +Usual numactl output after driver has “hot-plugged”/unveiled some +new Numa nodes with and without memory : +$ numactl --hardware +available: 9 nodes (0-8) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 0 size: 490037 MB +node 0 free: 484432 MB +node 1 cpus: +node 1 size: 97280 MB +node 1 free: 97279 MB +node 2 cpus: +node 2 size: 0 MB +node 2 free: 0 MB +node 3 cpus: +node 3 size: 0 MB +node 3 free: 0 MB +node 4 cpus: +node 4 size: 0 MB +node 4 free: 0 MB +node 5 cpus: +node 5 size: 0 MB +node 5 free: 0 MB +node 6 cpus: +node 6 size: 0 MB +node 6 free: 0 MB +node 7 cpus: +node 7 size: 0 MB +node 7 free: 0 MB +node 8 cpus: +node 8 size: 0 MB +node 8 free: 0 MB +node distances: +node 0 1 2 3 4 5 6 7 8 + 0: 10 80 80 80 80 80 80 80 80 + 1: 80 10 255 255 255 255 255 255 255 + 2: 80 255 10 255 255 255 255 255 255 + 3: 80 255 255 10 255 255 255 255 255 + 4: 80 255 255 255 10 255 255 255 255 + 5: 80 255 255 255 255 10 255 255 255 + 6: 80 255 255 255 255 255 10 255 255 + 7: 80 255 255 255 255 255 255 10 255 + 8: 80 255 255 255 255 255 255 255 10 + +With recent M.Rapoport set of fake-numa patches in mm-everything +and using numa=fake=4 boot parameter : +$ numactl --hardware +available: 4 nodes (0-3) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 0 size: 122518 MB +node 0 free: 117141 MB +node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 1 size: 219911 MB +node 1 free: 219751 MB +node 2 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 2 size: 122599 MB +node 2 free: 122541 MB +node 3 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 3 size: 122479 MB +node 3 free: 122408 MB +node distances: +node 0 1 2 3 + 0: 10 10 10 10 + 1: 10 10 10 10 + 2: 10 10 10 10 + 3: 10 10 10 10 + +With recent M.Rapoport set of fake-numa patches in mm-everything, +this patch on top, using numa=fake=4 boot parameter : +# numactl —hardware +available: 12 nodes (0-11) +node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 0 size: 122518 MB +node 0 free: 116429 MB +node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 1 size: 122631 MB +node 1 free: 122576 MB +node 2 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 2 size: 122599 MB +node 2 free: 122544 MB +node 3 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 +43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 +65 66 67 68 69 70 71 +node 3 size: 122479 MB +node 3 free: 122419 MB +node 4 cpus: +node 4 size: 97280 MB +node 4 free: 97279 MB +node 5 cpus: +node 5 size: 0 MB +node 5 free: 0 MB +node 6 cpus: +node 6 size: 0 MB +node 6 free: 0 MB +node 7 cpus: +node 7 size: 0 MB +node 7 free: 0 MB +node 8 cpus: +node 8 size: 0 MB +node 8 free: 0 MB +node 9 cpus: +node 9 size: 0 MB +node 9 free: 0 MB +node 10 cpus: +node 10 size: 0 MB +node 10 free: 0 MB +node 11 cpus: +node 11 size: 0 MB +node 11 free: 0 MB +node distances: +node 0 1 2 3 4 5 6 7 8 9 10 11 + 0: 10 10 10 10 80 80 80 80 80 80 80 80 + 1: 10 10 10 10 80 80 80 80 80 80 80 80 + 2: 10 10 10 10 80 80 80 80 80 80 80 80 + 3: 10 10 10 10 80 80 80 80 80 80 80 80 + 4: 80 80 80 80 10 255 255 255 255 255 255 255 + 5: 80 80 80 80 255 10 255 255 255 255 255 255 + 6: 80 80 80 80 255 255 10 255 255 255 255 255 + 7: 80 80 80 80 255 255 255 10 255 255 255 255 + 8: 80 80 80 80 255 255 255 255 10 255 255 255 + 9: 80 80 80 80 255 255 255 255 255 10 255 255 + 10: 80 80 80 80 255 255 255 255 255 255 10 255 + 11: 80 80 80 80 255 255 255 255 255 255 255 10 + +Link: https://lkml.kernel.org/r/20250106120659.359610-2-bfaccini@nvidia.com +Signed-off-by: Bruno Faccini +Cc: David Hildenbrand +Cc: John Hubbard +Cc: Mike Rapoport (Microsoft) +Cc: Zi Yan +Signed-off-by: Andrew Morton +Stable-dep-of: f46c26f1bcd9 ("mm: numa,memblock: include for 'numa_nodes_parsed'") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/acpi/numa/srat.c | 86 +++++++++++++++++++++++++++++++++++++++++++ + include/acpi/acpi_numa.h | 5 ++ + include/linux/numa_memblks.h | 3 + + mm/numa_emulation.c | 45 +++++++++++++++++++--- + mm/numa_memblks.c | 2 - + 5 files changed, 133 insertions(+), 8 deletions(-) + +--- a/drivers/acpi/numa/srat.c ++++ b/drivers/acpi/numa/srat.c +@@ -81,6 +81,92 @@ int acpi_map_pxm_to_node(int pxm) + } + EXPORT_SYMBOL(acpi_map_pxm_to_node); + ++#ifdef CONFIG_NUMA_EMU ++/* ++ * Take max_nid - 1 fake-numa nodes into account in both ++ * pxm_to_node_map()/node_to_pxm_map[] tables. ++ */ ++int __init fix_pxm_node_maps(int max_nid) ++{ ++ static int pxm_to_node_map_copy[MAX_PXM_DOMAINS] __initdata ++ = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; ++ static int node_to_pxm_map_copy[MAX_NUMNODES] __initdata ++ = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; ++ int i, j, index = -1, count = 0; ++ nodemask_t nodes_to_enable; ++ ++ if (numa_off || srat_disabled()) ++ return -1; ++ ++ /* find fake nodes PXM mapping */ ++ for (i = 0; i < MAX_NUMNODES; i++) { ++ if (node_to_pxm_map[i] != PXM_INVAL) { ++ for (j = 0; j <= max_nid; j++) { ++ if ((emu_nid_to_phys[j] == i) && ++ WARN(node_to_pxm_map_copy[j] != PXM_INVAL, ++ "Node %d is already binded to PXM %d\n", ++ j, node_to_pxm_map_copy[j])) ++ return -1; ++ if (emu_nid_to_phys[j] == i) { ++ node_to_pxm_map_copy[j] = ++ node_to_pxm_map[i]; ++ if (j > index) ++ index = j; ++ count++; ++ } ++ } ++ } ++ } ++ if (WARN(index != max_nid, "%d max nid when expected %d\n", ++ index, max_nid)) ++ return -1; ++ ++ nodes_clear(nodes_to_enable); ++ ++ /* map phys nodes not used for fake nodes */ ++ for (i = 0; i < MAX_NUMNODES; i++) { ++ if (node_to_pxm_map[i] != PXM_INVAL) { ++ for (j = 0; j <= max_nid; j++) ++ if (emu_nid_to_phys[j] == i) ++ break; ++ /* fake nodes PXM mapping has been done */ ++ if (j <= max_nid) ++ continue; ++ /* find first hole */ ++ for (j = 0; ++ j < MAX_NUMNODES && ++ node_to_pxm_map_copy[j] != PXM_INVAL; ++ j++) ++ ; ++ if (WARN(j == MAX_NUMNODES, ++ "Number of nodes exceeds MAX_NUMNODES\n")) ++ return -1; ++ node_to_pxm_map_copy[j] = node_to_pxm_map[i]; ++ node_set(j, nodes_to_enable); ++ count++; ++ } ++ } ++ ++ /* creating reverse mapping in pxm_to_node_map[] */ ++ for (i = 0; i < MAX_NUMNODES; i++) ++ if (node_to_pxm_map_copy[i] != PXM_INVAL && ++ pxm_to_node_map_copy[node_to_pxm_map_copy[i]] == NUMA_NO_NODE) ++ pxm_to_node_map_copy[node_to_pxm_map_copy[i]] = i; ++ ++ /* overwrite with new mapping */ ++ for (i = 0; i < MAX_NUMNODES; i++) { ++ node_to_pxm_map[i] = node_to_pxm_map_copy[i]; ++ pxm_to_node_map[i] = pxm_to_node_map_copy[i]; ++ } ++ ++ /* enable other nodes found in PXM for hotplug */ ++ nodes_or(numa_nodes_parsed, nodes_to_enable, numa_nodes_parsed); ++ ++ pr_debug("found %d total number of nodes\n", count); ++ return 0; ++} ++#endif ++ + static void __init + acpi_table_print_srat_entry(struct acpi_subtable_header *header) + { +--- a/include/acpi/acpi_numa.h ++++ b/include/acpi/acpi_numa.h +@@ -17,11 +17,16 @@ extern int node_to_pxm(int); + extern int acpi_map_pxm_to_node(int); + extern unsigned char acpi_srat_revision; + extern void disable_srat(void); ++extern int fix_pxm_node_maps(int max_nid); + + extern void bad_srat(void); + extern int srat_disabled(void); + + #else /* CONFIG_ACPI_NUMA */ ++static inline int fix_pxm_node_maps(int max_nid) ++{ ++ return 0; ++} + static inline void disable_srat(void) + { + } +--- a/include/linux/numa_memblks.h ++++ b/include/linux/numa_memblks.h +@@ -29,7 +29,10 @@ int __init numa_cleanup_meminfo(struct n + int __init numa_memblks_init(int (*init_func)(void), + bool memblock_force_top_down); + ++extern int numa_distance_cnt; ++ + #ifdef CONFIG_NUMA_EMU ++extern int emu_nid_to_phys[MAX_NUMNODES]; + int numa_emu_cmdline(char *str); + void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, + unsigned int nr_emu_nids); +--- a/mm/numa_emulation.c ++++ b/mm/numa_emulation.c +@@ -8,11 +8,12 @@ + #include + #include + #include ++#include + + #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) + #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) + +-static int emu_nid_to_phys[MAX_NUMNODES]; ++int emu_nid_to_phys[MAX_NUMNODES]; + static char *emu_cmdline __initdata; + + int __init numa_emu_cmdline(char *str) +@@ -379,6 +380,7 @@ void __init numa_emulation(struct numa_m + size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); + int max_emu_nid, dfl_phys_nid; + int i, j, ret; ++ nodemask_t physnode_mask = numa_nodes_parsed; + + if (!emu_cmdline) + goto no_emu; +@@ -395,7 +397,6 @@ void __init numa_emulation(struct numa_m + * split the system RAM into N fake nodes. + */ + if (strchr(emu_cmdline, 'U')) { +- nodemask_t physnode_mask = numa_nodes_parsed; + unsigned long n; + int nid = 0; + +@@ -465,9 +466,6 @@ void __init numa_emulation(struct numa_m + */ + max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid); + +- /* commit */ +- *numa_meminfo = ei; +- + /* Make sure numa_nodes_parsed only contains emulated nodes */ + nodes_clear(numa_nodes_parsed); + for (i = 0; i < ARRAY_SIZE(ei.blk); i++) +@@ -475,10 +473,21 @@ void __init numa_emulation(struct numa_m + ei.blk[i].nid != NUMA_NO_NODE) + node_set(ei.blk[i].nid, numa_nodes_parsed); + +- numa_emu_update_cpu_to_node(emu_nid_to_phys, ARRAY_SIZE(emu_nid_to_phys)); ++ /* fix pxm_to_node_map[] and node_to_pxm_map[] to avoid collision ++ * with faked numa nodes, particularly during later memory hotplug ++ * handling, and also update numa_nodes_parsed accordingly. ++ */ ++ ret = fix_pxm_node_maps(max_emu_nid); ++ if (ret < 0) ++ goto no_emu; ++ ++ /* commit */ ++ *numa_meminfo = ei; ++ ++ numa_emu_update_cpu_to_node(emu_nid_to_phys, max_emu_nid + 1); + + /* make sure all emulated nodes are mapped to a physical node */ +- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) ++ for (i = 0; i < max_emu_nid + 1; i++) + if (emu_nid_to_phys[i] == NUMA_NO_NODE) + emu_nid_to_phys[i] = dfl_phys_nid; + +@@ -501,12 +510,34 @@ void __init numa_emulation(struct numa_m + numa_set_distance(i, j, dist); + } + } ++ for (i = 0; i < numa_distance_cnt; i++) { ++ for (j = 0; j < numa_distance_cnt; j++) { ++ int physi, physj; ++ u8 dist; ++ ++ /* distance between fake nodes is already ok */ ++ if (emu_nid_to_phys[i] != NUMA_NO_NODE && ++ emu_nid_to_phys[j] != NUMA_NO_NODE) ++ continue; ++ if (emu_nid_to_phys[i] != NUMA_NO_NODE) ++ physi = emu_nid_to_phys[i]; ++ else ++ physi = i - max_emu_nid; ++ if (emu_nid_to_phys[j] != NUMA_NO_NODE) ++ physj = emu_nid_to_phys[j]; ++ else ++ physj = j - max_emu_nid; ++ dist = phys_dist[physi * numa_dist_cnt + physj]; ++ numa_set_distance(i, j, dist); ++ } ++ } + + /* free the copied physical distance table */ + memblock_free(phys_dist, phys_size); + return; + + no_emu: ++ numa_nodes_parsed = physnode_mask; + /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */ + for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) + emu_nid_to_phys[i] = i; +--- a/mm/numa_memblks.c ++++ b/mm/numa_memblks.c +@@ -7,7 +7,7 @@ + #include + #include + +-static int numa_distance_cnt; ++int numa_distance_cnt; + static u8 *numa_distance; + + nodemask_t numa_nodes_parsed __initdata; diff --git a/queue-6.12/mm-kmsan-fix-poisoning-of-high-order-non-compound-pages.patch b/queue-6.12/mm-kmsan-fix-poisoning-of-high-order-non-compound-pages.patch new file mode 100644 index 0000000000..c7e06b73d7 --- /dev/null +++ b/queue-6.12/mm-kmsan-fix-poisoning-of-high-order-non-compound-pages.patch @@ -0,0 +1,55 @@ +From stable+bounces-210674-greg=kroah.com@vger.kernel.org Wed Jan 21 04:14:40 2026 +From: Sasha Levin +Date: Tue, 20 Jan 2026 22:14:29 -0500 +Subject: mm: kmsan: fix poisoning of high-order non-compound pages +To: stable@vger.kernel.org +Cc: Ryan Roberts , Alexander Potapenko , Dmitriy Vyukov , Marco Elver , Andrew Morton , Sasha Levin +Message-ID: <20260121031429.1186608-1-sashal@kernel.org> + +From: Ryan Roberts + +[ Upstream commit 4795d205d78690a46b60164f44b8bb7b3e800865 ] + +kmsan_free_page() is called by the page allocator's free_pages_prepare() +during page freeing. Its job is to poison all the memory covered by the +page. It can be called with an order-0 page, a compound high-order page +or a non-compound high-order page. But page_size() only works for order-0 +and compound pages. For a non-compound high-order page it will +incorrectly return PAGE_SIZE. + +The implication is that the tail pages of a high-order non-compound page +do not get poisoned at free, so any invalid access while they are free +could go unnoticed. It looks like the pages will be poisoned again at +allocation time, so that would bookend the window. + +Fix this by using the order parameter to calculate the size. + +Link: https://lkml.kernel.org/r/20260104134348.3544298-1-ryan.roberts@arm.com +Fixes: b073d7f8aee4 ("mm: kmsan: maintain KMSAN metadata for page operations") +Signed-off-by: Ryan Roberts +Reviewed-by: Alexander Potapenko +Tested-by: Alexander Potapenko +Cc: Dmitriy Vyukov +Cc: Dmitry Vyukov +Cc: Marco Elver +Cc: Ryan Roberts +Cc: +Signed-off-by: Andrew Morton +[ Adjust context ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/kmsan/shadow.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/kmsan/shadow.c ++++ b/mm/kmsan/shadow.c +@@ -208,7 +208,7 @@ void kmsan_free_page(struct page *page, + return; + kmsan_enter_runtime(); + kmsan_internal_poison_memory(page_address(page), +- page_size(page), ++ PAGE_SIZE << order, + GFP_KERNEL, + KMSAN_POISON_CHECK | KMSAN_POISON_FREE); + kmsan_leave_runtime(); diff --git a/queue-6.12/mm-numa-memblock-include-asm-numa.h-for-numa_nodes_parsed.patch b/queue-6.12/mm-numa-memblock-include-asm-numa.h-for-numa_nodes_parsed.patch new file mode 100644 index 0000000000..77c74aa42c --- /dev/null +++ b/queue-6.12/mm-numa-memblock-include-asm-numa.h-for-numa_nodes_parsed.patch @@ -0,0 +1,42 @@ +From stable+bounces-210673-greg=kroah.com@vger.kernel.org Wed Jan 21 04:07:09 2026 +From: Sasha Levin +Date: Tue, 20 Jan 2026 22:06:55 -0500 +Subject: mm: numa,memblock: include for 'numa_nodes_parsed' +To: stable@vger.kernel.org +Cc: Ben Dooks , "Mike Rapoport (Microsoft)" , Andrew Morton , Sasha Levin +Message-ID: <20260121030655.1173340-2-sashal@kernel.org> + +From: Ben Dooks + +[ Upstream commit f46c26f1bcd9164d7f3377f15ca75488a3e44362 ] + +The 'numa_nodes_parsed' is defined in but this file +is not included in mm/numa_memblks.c (build x86_64) so add this +to the incldues to fix the following sparse warning: + +mm/numa_memblks.c:13:12: warning: symbol 'numa_nodes_parsed' was not declared. Should it be static? + +Link: https://lkml.kernel.org/r/20260108101539.229192-1-ben.dooks@codethink.co.uk +Fixes: 87482708210f ("mm: introduce numa_memblks") +Signed-off-by: Ben Dooks +Reviewed-by: Mike Rapoport (Microsoft) +Cc: Ben Dooks +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/numa_memblks.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/numa_memblks.c ++++ b/mm/numa_memblks.c +@@ -7,6 +7,8 @@ + #include + #include + ++#include ++ + int numa_distance_cnt; + static u8 *numa_distance; + diff --git a/queue-6.12/mm-page_alloc-batch-page-freeing-in-decay_pcp_high.patch b/queue-6.12/mm-page_alloc-batch-page-freeing-in-decay_pcp_high.patch new file mode 100644 index 0000000000..a2f9b3866c --- /dev/null +++ b/queue-6.12/mm-page_alloc-batch-page-freeing-in-decay_pcp_high.patch @@ -0,0 +1,69 @@ +From stable+bounces-210731-greg=kroah.com@vger.kernel.org Wed Jan 21 12:44:11 2026 +From: Sasha Levin +Date: Wed, 21 Jan 2026 06:28:07 -0500 +Subject: mm/page_alloc: batch page freeing in decay_pcp_high +To: stable@vger.kernel.org +Cc: Joshua Hahn , Chris Mason , Andrew Morton , Johannes Weiner , Vlastimil Babka , Brendan Jackman , "Kirill A. Shutemov" , Michal Hocko , SeongJae Park , Suren Baghdasaryan , Zi Yan , Sasha Levin +Message-ID: <20260121112808.1461983-2-sashal@kernel.org> + +From: Joshua Hahn + +[ Upstream commit fc4b909c368f3a7b08c895dd5926476b58e85312 ] + +It is possible for pcp->count - pcp->high to exceed pcp->batch by a lot. +When this happens, we should perform batching to ensure that +free_pcppages_bulk isn't called with too many pages to free at once and +starve out other threads that need the pcp or zone lock. + +Since we are still only freeing the difference between the initial +pcp->count and pcp->high values, there should be no change to how many +pages are freed. + +Link: https://lkml.kernel.org/r/20251014145011.3427205-3-joshua.hahnjy@gmail.com +Signed-off-by: Joshua Hahn +Suggested-by: Chris Mason +Suggested-by: Andrew Morton +Co-developed-by: Johannes Weiner +Reviewed-by: Vlastimil Babka +Cc: Brendan Jackman +Cc: "Kirill A. Shutemov" +Cc: Michal Hocko +Cc: SeongJae Park +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Stable-dep-of: 038a102535eb ("mm/page_alloc: prevent pcp corruption with SMP=n") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2365,7 +2365,7 @@ static int rmqueue_bulk(struct zone *zon + */ + bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) + { +- int high_min, to_drain, batch; ++ int high_min, to_drain, to_drain_batched, batch; + bool todo = false; + + high_min = READ_ONCE(pcp->high_min); +@@ -2383,11 +2383,14 @@ bool decay_pcp_high(struct zone *zone, s + } + + to_drain = pcp->count - pcp->high; +- if (to_drain > 0) { ++ while (to_drain > 0) { ++ to_drain_batched = min(to_drain, batch); + spin_lock(&pcp->lock); +- free_pcppages_bulk(zone, to_drain, pcp, 0); ++ free_pcppages_bulk(zone, to_drain_batched, pcp, 0); + spin_unlock(&pcp->lock); + todo = true; ++ ++ to_drain -= to_drain_batched; + } + + return todo; diff --git a/queue-6.12/mm-page_alloc-prevent-pcp-corruption-with-smp-n.patch b/queue-6.12/mm-page_alloc-prevent-pcp-corruption-with-smp-n.patch new file mode 100644 index 0000000000..7495cddf92 --- /dev/null +++ b/queue-6.12/mm-page_alloc-prevent-pcp-corruption-with-smp-n.patch @@ -0,0 +1,213 @@ +From stable+bounces-210732-greg=kroah.com@vger.kernel.org Wed Jan 21 12:32:45 2026 +From: Sasha Levin +Date: Wed, 21 Jan 2026 06:28:08 -0500 +Subject: mm/page_alloc: prevent pcp corruption with SMP=n +To: stable@vger.kernel.org +Cc: Vlastimil Babka , kernel test robot , Matthew Wilcox , Mel Gorman , Brendan Jackman , Johannes Weiner , Michal Hocko , Sebastian Andrzej Siewior , Steven Rostedt , Suren Baghdasaryan , Zi Yan , Andrew Morton , Sasha Levin +Message-ID: <20260121112808.1461983-3-sashal@kernel.org> + +From: Vlastimil Babka + +[ Upstream commit 038a102535eb49e10e93eafac54352fcc5d78847 ] + +The kernel test robot has reported: + + BUG: spinlock trylock failure on UP on CPU#0, kcompactd0/28 + lock: 0xffff888807e35ef0, .magic: dead4ead, .owner: kcompactd0/28, .owner_cpu: 0 + CPU: 0 UID: 0 PID: 28 Comm: kcompactd0 Not tainted 6.18.0-rc5-00127-ga06157804399 #1 PREEMPT 8cc09ef94dcec767faa911515ce9e609c45db470 + Call Trace: + + __dump_stack (lib/dump_stack.c:95) + dump_stack_lvl (lib/dump_stack.c:123) + dump_stack (lib/dump_stack.c:130) + spin_dump (kernel/locking/spinlock_debug.c:71) + do_raw_spin_trylock (kernel/locking/spinlock_debug.c:?) + _raw_spin_trylock (include/linux/spinlock_api_smp.h:89 kernel/locking/spinlock.c:138) + __free_frozen_pages (mm/page_alloc.c:2973) + ___free_pages (mm/page_alloc.c:5295) + __free_pages (mm/page_alloc.c:5334) + tlb_remove_table_rcu (include/linux/mm.h:? include/linux/mm.h:3122 include/asm-generic/tlb.h:220 mm/mmu_gather.c:227 mm/mmu_gather.c:290) + ? __cfi_tlb_remove_table_rcu (mm/mmu_gather.c:289) + ? rcu_core (kernel/rcu/tree.c:?) + rcu_core (include/linux/rcupdate.h:341 kernel/rcu/tree.c:2607 kernel/rcu/tree.c:2861) + rcu_core_si (kernel/rcu/tree.c:2879) + handle_softirqs (arch/x86/include/asm/jump_label.h:36 include/trace/events/irq.h:142 kernel/softirq.c:623) + __irq_exit_rcu (arch/x86/include/asm/jump_label.h:36 kernel/softirq.c:725) + irq_exit_rcu (kernel/softirq.c:741) + sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1052) + + + RIP: 0010:_raw_spin_unlock_irqrestore (arch/x86/include/asm/preempt.h:95 include/linux/spinlock_api_smp.h:152 kernel/locking/spinlock.c:194) + free_pcppages_bulk (mm/page_alloc.c:1494) + drain_pages_zone (include/linux/spinlock.h:391 mm/page_alloc.c:2632) + __drain_all_pages (mm/page_alloc.c:2731) + drain_all_pages (mm/page_alloc.c:2747) + kcompactd (mm/compaction.c:3115) + kthread (kernel/kthread.c:465) + ? __cfi_kcompactd (mm/compaction.c:3166) + ? __cfi_kthread (kernel/kthread.c:412) + ret_from_fork (arch/x86/kernel/process.c:164) + ? __cfi_kthread (kernel/kthread.c:412) + ret_from_fork_asm (arch/x86/entry/entry_64.S:255) + + +Matthew has analyzed the report and identified that in drain_page_zone() +we are in a section protected by spin_lock(&pcp->lock) and then get an +interrupt that attempts spin_trylock() on the same lock. The code is +designed to work this way without disabling IRQs and occasionally fail the +trylock with a fallback. However, the SMP=n spinlock implementation +assumes spin_trylock() will always succeed, and thus it's normally a +no-op. Here the enabled lock debugging catches the problem, but otherwise +it could cause a corruption of the pcp structure. + +The problem has been introduced by commit 574907741599 ("mm/page_alloc: +leave IRQs enabled for per-cpu page allocations"). The pcp locking scheme +recognizes the need for disabling IRQs to prevent nesting spin_trylock() +sections on SMP=n, but the need to prevent the nesting in spin_lock() has +not been recognized. Fix it by introducing local wrappers that change the +spin_lock() to spin_lock_iqsave() with SMP=n and use them in all places +that do spin_lock(&pcp->lock). + +[vbabka@suse.cz: add pcp_ prefix to the spin_lock_irqsave wrappers, per Steven] +Link: https://lkml.kernel.org/r/20260105-fix-pcp-up-v1-1-5579662d2071@suse.cz +Fixes: 574907741599 ("mm/page_alloc: leave IRQs enabled for per-cpu page allocations") +Signed-off-by: Vlastimil Babka +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202512101320.e2f2dd6f-lkp@intel.com +Analyzed-by: Matthew Wilcox +Link: https://lore.kernel.org/all/aUW05pyc9nZkvY-1@casper.infradead.org/ +Acked-by: Mel Gorman +Cc: Brendan Jackman +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Sebastian Andrzej Siewior +Cc: Steven Rostedt +Cc: Suren Baghdasaryan +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 47 +++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 39 insertions(+), 8 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -163,6 +163,33 @@ static DEFINE_MUTEX(pcp_batch_high_lock) + #define pcp_spin_unlock(ptr) \ + pcpu_spin_unlock(lock, ptr) + ++/* ++ * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e. ++ * a potentially remote cpu drain) and get interrupted by an operation that ++ * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP ++ * spinlock assumptions making the trylock a no-op. So we have to turn that ++ * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no ++ * remote cpu's so we can only be locking the only existing local one. ++ */ ++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) ++static inline void __flags_noop(unsigned long *flags) { } ++#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ ++({ \ ++ __flags_noop(&(flags)); \ ++ spin_lock(&(ptr)->lock); \ ++}) ++#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ ++({ \ ++ spin_unlock(&(ptr)->lock); \ ++ __flags_noop(&(flags)); \ ++}) ++#else ++#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ ++ spin_lock_irqsave(&(ptr)->lock, flags) ++#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ ++ spin_unlock_irqrestore(&(ptr)->lock, flags) ++#endif ++ + #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID + DEFINE_PER_CPU(int, numa_node); + EXPORT_PER_CPU_SYMBOL(numa_node); +@@ -2366,6 +2393,7 @@ static int rmqueue_bulk(struct zone *zon + bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) + { + int high_min, to_drain, to_drain_batched, batch; ++ unsigned long UP_flags; + bool todo = false; + + high_min = READ_ONCE(pcp->high_min); +@@ -2385,9 +2413,9 @@ bool decay_pcp_high(struct zone *zone, s + to_drain = pcp->count - pcp->high; + while (to_drain > 0) { + to_drain_batched = min(to_drain, batch); +- spin_lock(&pcp->lock); ++ pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + free_pcppages_bulk(zone, to_drain_batched, pcp, 0); +- spin_unlock(&pcp->lock); ++ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + todo = true; + + to_drain -= to_drain_batched; +@@ -2404,14 +2432,15 @@ bool decay_pcp_high(struct zone *zone, s + */ + void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) + { ++ unsigned long UP_flags; + int to_drain, batch; + + batch = READ_ONCE(pcp->batch); + to_drain = min(pcp->count, batch); + if (to_drain > 0) { +- spin_lock(&pcp->lock); ++ pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + free_pcppages_bulk(zone, to_drain, pcp, 0); +- spin_unlock(&pcp->lock); ++ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + } + } + #endif +@@ -2422,10 +2451,11 @@ void drain_zone_pages(struct zone *zone, + static void drain_pages_zone(unsigned int cpu, struct zone *zone) + { + struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); ++ unsigned long UP_flags; + int count; + + do { +- spin_lock(&pcp->lock); ++ pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + count = pcp->count; + if (count) { + int to_drain = min(count, +@@ -2434,7 +2464,7 @@ static void drain_pages_zone(unsigned in + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } +- spin_unlock(&pcp->lock); ++ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + } while (count); + } + +@@ -5795,6 +5825,7 @@ static void zone_pcp_update_cacheinfo(st + { + struct per_cpu_pages *pcp; + struct cpu_cacheinfo *cci; ++ unsigned long UP_flags; + + pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + cci = get_cpu_cacheinfo(cpu); +@@ -5805,12 +5836,12 @@ static void zone_pcp_update_cacheinfo(st + * This can reduce zone lock contention without hurting + * cache-hot pages sharing. + */ +- spin_lock(&pcp->lock); ++ pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) + pcp->flags |= PCPF_FREE_HIGH_BATCH; + else + pcp->flags &= ~PCPF_FREE_HIGH_BATCH; +- spin_unlock(&pcp->lock); ++ pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + } + + void setup_pcp_cacheinfo(unsigned int cpu) diff --git a/queue-6.12/mm-page_alloc-vmstat-simplify-refresh_cpu_vm_stats-change-detection.patch b/queue-6.12/mm-page_alloc-vmstat-simplify-refresh_cpu_vm_stats-change-detection.patch new file mode 100644 index 0000000000..dd26c36211 --- /dev/null +++ b/queue-6.12/mm-page_alloc-vmstat-simplify-refresh_cpu_vm_stats-change-detection.patch @@ -0,0 +1,278 @@ +From stable+bounces-210730-greg=kroah.com@vger.kernel.org Wed Jan 21 12:48:06 2026 +From: Sasha Levin +Date: Wed, 21 Jan 2026 06:28:06 -0500 +Subject: mm/page_alloc/vmstat: simplify refresh_cpu_vm_stats change detection +To: stable@vger.kernel.org +Cc: Joshua Hahn , Vlastimil Babka , SeongJae Park , Brendan Jackman , Chris Mason , Johannes Weiner , "Kirill A. Shutemov" , Michal Hocko , Suren Baghdasaryan , Zi Yan , Andrew Morton , Sasha Levin +Message-ID: <20260121112808.1461983-1-sashal@kernel.org> + +From: Joshua Hahn + +[ Upstream commit 0acc67c4030c39f39ac90413cc5d0abddd3a9527 ] + +Patch series "mm/page_alloc: Batch callers of free_pcppages_bulk", v5. + +Motivation & Approach +===================== + +While testing workloads with high sustained memory pressure on large +machines in the Meta fleet (1Tb memory, 316 CPUs), we saw an unexpectedly +high number of softlockups. Further investigation showed that the zone +lock in free_pcppages_bulk was being held for a long time, and was called +to free 2k+ pages over 100 times just during boot. + +This causes starvation in other processes for the zone lock, which can +lead to the system stalling as multiple threads cannot make progress +without the locks. We can see these issues manifesting as warnings: + +[ 4512.591979] rcu: INFO: rcu_sched self-detected stall on CPU +[ 4512.604370] rcu: 20-....: (9312 ticks this GP) idle=a654/1/0x4000000000000000 softirq=309340/309344 fqs=5426 +[ 4512.626401] rcu: hardirqs softirqs csw/system +[ 4512.638793] rcu: number: 0 145 0 +[ 4512.651177] rcu: cputime: 30 10410 174 ==> 10558(ms) +[ 4512.666657] rcu: (t=21077 jiffies g=783665 q=1242213 ncpus=316) + +While these warnings don't indicate a crash or a kernel panic, they do +point to the underlying issue of lock contention. To prevent starvation +in both locks, batch the freeing of pages using pcp->batch. + +Because free_pcppages_bulk is called with the pcp lock and acquires the +zone lock, relinquishing and reacquiring the locks are only effective when +both of them are broken together (unless the system was built with queued +spinlocks). Thus, instead of modifying free_pcppages_bulk to break both +locks, batch the freeing from its callers instead. + +A similar fix has been implemented in the Meta fleet, and we have seen +significantly less softlockups. + +Testing +======= +The following are a few synthetic benchmarks, made on three machines. The +first is a large machine with 754GiB memory and 316 processors. +The second is a relatively smaller machine with 251GiB memory and 176 +processors. The third and final is the smallest of the three, which has 62GiB +memory and 36 processors. + +On all machines, I kick off a kernel build with -j$(nproc). +Negative delta is better (faster compilation). + +Large machine (754GiB memory, 316 processors) +make -j$(nproc) ++------------+---------------+-----------+ +| Metric (s) | Variation (%) | Delta(%) | ++------------+---------------+-----------+ +| real | 0.8070 | - 1.4865 | +| user | 0.2823 | + 0.4081 | +| sys | 5.0267 | -11.8737 | ++------------+---------------+-----------+ + +Medium machine (251GiB memory, 176 processors) +make -j$(nproc) ++------------+---------------+----------+ +| Metric (s) | Variation (%) | Delta(%) | ++------------+---------------+----------+ +| real | 0.2806 | +0.0351 | +| user | 0.0994 | +0.3170 | +| sys | 0.6229 | -0.6277 | ++------------+---------------+----------+ + +Small machine (62GiB memory, 36 processors) +make -j$(nproc) ++------------+---------------+----------+ +| Metric (s) | Variation (%) | Delta(%) | ++------------+---------------+----------+ +| real | 0.1503 | -2.6585 | +| user | 0.0431 | -2.2984 | +| sys | 0.1870 | -3.2013 | ++------------+---------------+----------+ + +Here, variation is the coefficient of variation, i.e. standard deviation +/ mean. + +Based on these results, it seems like there are varying degrees to how +much lock contention this reduces. For the largest and smallest machines +that I ran the tests on, it seems like there is quite some significant +reduction. There is also some performance increases visible from +userspace. + +Interestingly, the performance gains don't scale with the size of the +machine, but rather there seems to be a dip in the gain there is for the +medium-sized machine. One possible theory is that because the high +watermark depends on both memory and the number of local CPUs, what +impacts zone contention the most is not these individual values, but +rather the ratio of mem:processors. + +This patch (of 5): + +Currently, refresh_cpu_vm_stats returns an int, indicating how many +changes were made during its updates. Using this information, callers +like vmstat_update can heuristically determine if more work will be done +in the future. + +However, all of refresh_cpu_vm_stats's callers either (a) ignore the +result, only caring about performing the updates, or (b) only care about +whether changes were made, but not *how many* changes were made. + +Simplify the code by returning a bool instead to indicate if updates +were made. + +In addition, simplify fold_diff and decay_pcp_high to return a bool +for the same reason. + +Link: https://lkml.kernel.org/r/20251014145011.3427205-1-joshua.hahnjy@gmail.com +Link: https://lkml.kernel.org/r/20251014145011.3427205-2-joshua.hahnjy@gmail.com +Signed-off-by: Joshua Hahn +Reviewed-by: Vlastimil Babka +Reviewed-by: SeongJae Park +Cc: Brendan Jackman +Cc: Chris Mason +Cc: Johannes Weiner +Cc: "Kirill A. Shutemov" +Cc: Michal Hocko +Cc: Suren Baghdasaryan +Cc: Zi Yan +Signed-off-by: Andrew Morton +Stable-dep-of: 038a102535eb ("mm/page_alloc: prevent pcp corruption with SMP=n") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/gfp.h | 2 +- + mm/page_alloc.c | 8 ++++---- + mm/vmstat.c | 28 +++++++++++++++------------- + 3 files changed, 20 insertions(+), 18 deletions(-) + +--- a/include/linux/gfp.h ++++ b/include/linux/gfp.h +@@ -397,7 +397,7 @@ extern void page_frag_free(void *addr); + #define free_page(addr) free_pages((addr), 0) + + void page_alloc_init_cpuhp(void); +-int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); ++bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); + void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); + void drain_all_pages(struct zone *zone); + void drain_local_pages(struct zone *zone); +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2363,10 +2363,10 @@ static int rmqueue_bulk(struct zone *zon + * Called from the vmstat counter updater to decay the PCP high. + * Return whether there are addition works to do. + */ +-int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) ++bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) + { + int high_min, to_drain, batch; +- int todo = 0; ++ bool todo = false; + + high_min = READ_ONCE(pcp->high_min); + batch = READ_ONCE(pcp->batch); +@@ -2379,7 +2379,7 @@ int decay_pcp_high(struct zone *zone, st + pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX), + pcp->high - (pcp->high >> 3), high_min); + if (pcp->high > high_min) +- todo++; ++ todo = true; + } + + to_drain = pcp->count - pcp->high; +@@ -2387,7 +2387,7 @@ int decay_pcp_high(struct zone *zone, st + spin_lock(&pcp->lock); + free_pcppages_bulk(zone, to_drain, pcp, 0); + spin_unlock(&pcp->lock); +- todo++; ++ todo = true; + } + + return todo; +--- a/mm/vmstat.c ++++ b/mm/vmstat.c +@@ -768,25 +768,25 @@ EXPORT_SYMBOL(dec_node_page_state); + + /* + * Fold a differential into the global counters. +- * Returns the number of counters updated. ++ * Returns whether counters were updated. + */ + static int fold_diff(int *zone_diff, int *node_diff) + { + int i; +- int changes = 0; ++ bool changed = false; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (zone_diff[i]) { + atomic_long_add(zone_diff[i], &vm_zone_stat[i]); +- changes++; ++ changed = true; + } + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + if (node_diff[i]) { + atomic_long_add(node_diff[i], &vm_node_stat[i]); +- changes++; ++ changed = true; + } +- return changes; ++ return changed; + } + + /* +@@ -803,16 +803,16 @@ static int fold_diff(int *zone_diff, int + * with the global counters. These could cause remote node cache line + * bouncing and will have to be only done when necessary. + * +- * The function returns the number of global counters updated. ++ * The function returns whether global counters were updated. + */ +-static int refresh_cpu_vm_stats(bool do_pagesets) ++static bool refresh_cpu_vm_stats(bool do_pagesets) + { + struct pglist_data *pgdat; + struct zone *zone; + int i; + int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; + int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; +- int changes = 0; ++ bool changed = false; + + for_each_populated_zone(zone) { + struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; +@@ -836,7 +836,8 @@ static int refresh_cpu_vm_stats(bool do_ + if (do_pagesets) { + cond_resched(); + +- changes += decay_pcp_high(zone, this_cpu_ptr(pcp)); ++ if (decay_pcp_high(zone, this_cpu_ptr(pcp))) ++ changed = true; + #ifdef CONFIG_NUMA + /* + * Deal with draining the remote pageset of this +@@ -858,13 +859,13 @@ static int refresh_cpu_vm_stats(bool do_ + } + + if (__this_cpu_dec_return(pcp->expire)) { +- changes++; ++ changed = true; + continue; + } + + if (__this_cpu_read(pcp->count)) { + drain_zone_pages(zone, this_cpu_ptr(pcp)); +- changes++; ++ changed = true; + } + #endif + } +@@ -884,8 +885,9 @@ static int refresh_cpu_vm_stats(bool do_ + } + } + +- changes += fold_diff(global_zone_diff, global_node_diff); +- return changes; ++ if (fold_diff(global_zone_diff, global_node_diff)) ++ changed = true; ++ return changed; + } + + /* diff --git a/queue-6.12/phy-phy-rockchip-inno-usb2-use-dev_err_probe-in-the-probe-path.patch b/queue-6.12/phy-phy-rockchip-inno-usb2-use-dev_err_probe-in-the-probe-path.patch new file mode 100644 index 0000000000..f749ef3cb7 --- /dev/null +++ b/queue-6.12/phy-phy-rockchip-inno-usb2-use-dev_err_probe-in-the-probe-path.patch @@ -0,0 +1,90 @@ +From stable+bounces-210636-greg=kroah.com@vger.kernel.org Wed Jan 21 02:39:35 2026 +From: Sasha Levin +Date: Tue, 20 Jan 2026 20:38:55 -0500 +Subject: phy: phy-rockchip-inno-usb2: Use dev_err_probe() in the probe path +To: stable@vger.kernel.org +Cc: Dragan Simic , Heiko Stuebner , Vinod Koul , Sasha Levin +Message-ID: <20260121013856.1104103-1-sashal@kernel.org> + +From: Dragan Simic + +[ Upstream commit 40452520850683f6771094ca218ff206d1fcb022 ] + +Improve error handling in the probe path by using function dev_err_probe() +instead of function dev_err(), where appropriate. + +Signed-off-by: Dragan Simic +Reviewed-by: Heiko Stuebner +Link: https://lore.kernel.org/r/d4ccd9fc278fb46ea868406bf77811ee507f0e4e.1725524803.git.dsimic@manjaro.org +Signed-off-by: Vinod Koul +Stable-dep-of: e07dea3de508 ("phy: rockchip: inno-usb2: Fix a double free bug in rockchip_usb2phy_probe()") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 27 +++++++++----------------- + 1 file changed, 10 insertions(+), 17 deletions(-) + +--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c ++++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +@@ -424,11 +424,9 @@ static int rockchip_usb2phy_extcon_regis + + if (of_property_read_bool(node, "extcon")) { + edev = extcon_get_edev_by_phandle(rphy->dev, 0); +- if (IS_ERR(edev)) { +- if (PTR_ERR(edev) != -EPROBE_DEFER) +- dev_err(rphy->dev, "Invalid or missing extcon\n"); +- return PTR_ERR(edev); +- } ++ if (IS_ERR(edev)) ++ return dev_err_probe(rphy->dev, PTR_ERR(edev), ++ "invalid or missing extcon\n"); + } else { + /* Initialize extcon device */ + edev = devm_extcon_dev_allocate(rphy->dev, +@@ -438,10 +436,9 @@ static int rockchip_usb2phy_extcon_regis + return -ENOMEM; + + ret = devm_extcon_dev_register(rphy->dev, edev); +- if (ret) { +- dev_err(rphy->dev, "failed to register extcon device\n"); +- return ret; +- } ++ if (ret) ++ return dev_err_probe(rphy->dev, ret, ++ "failed to register extcon device\n"); + } + + rphy->edev = edev; +@@ -1417,10 +1414,8 @@ static int rockchip_usb2phy_probe(struct + } + + ret = rockchip_usb2phy_clk480m_register(rphy); +- if (ret) { +- dev_err(dev, "failed to register 480m output clock\n"); +- return ret; +- } ++ if (ret) ++ return dev_err_probe(dev, ret, "failed to register 480m output clock\n"); + + if (rphy->phy_cfg->phy_tuning) { + ret = rphy->phy_cfg->phy_tuning(rphy); +@@ -1440,8 +1435,7 @@ static int rockchip_usb2phy_probe(struct + + phy = devm_phy_create(dev, child_np, &rockchip_usb2phy_ops); + if (IS_ERR(phy)) { +- dev_err_probe(dev, PTR_ERR(phy), "failed to create phy\n"); +- ret = PTR_ERR(phy); ++ ret = dev_err_probe(dev, PTR_ERR(phy), "failed to create phy\n"); + goto put_child; + } + +@@ -1478,8 +1472,7 @@ next_child: + "rockchip_usb2phy", + rphy); + if (ret) { +- dev_err(rphy->dev, +- "failed to request usb2phy irq handle\n"); ++ dev_err_probe(rphy->dev, ret, "failed to request usb2phy irq handle\n"); + goto put_child; + } + } diff --git a/queue-6.12/phy-rockchip-inno-usb2-fix-a-double-free-bug-in-rockchip_usb2phy_probe.patch b/queue-6.12/phy-rockchip-inno-usb2-fix-a-double-free-bug-in-rockchip_usb2phy_probe.patch new file mode 100644 index 0000000000..6a93e00de6 --- /dev/null +++ b/queue-6.12/phy-rockchip-inno-usb2-fix-a-double-free-bug-in-rockchip_usb2phy_probe.patch @@ -0,0 +1,43 @@ +From stable+bounces-210635-greg=kroah.com@vger.kernel.org Wed Jan 21 02:39:30 2026 +From: Sasha Levin +Date: Tue, 20 Jan 2026 20:38:56 -0500 +Subject: phy: rockchip: inno-usb2: Fix a double free bug in rockchip_usb2phy_probe() +To: stable@vger.kernel.org +Cc: Wentao Liang , Neil Armstrong , Vinod Koul , Sasha Levin +Message-ID: <20260121013856.1104103-2-sashal@kernel.org> + +From: Wentao Liang + +[ Upstream commit e07dea3de508cd6950c937cec42de7603190e1ca ] + +The for_each_available_child_of_node() calls of_node_put() to +release child_np in each success loop. After breaking from the +loop with the child_np has been released, the code will jump to +the put_child label and will call the of_node_put() again if the +devm_request_threaded_irq() fails. These cause a double free bug. + +Fix by returning directly to avoid the duplicate of_node_put(). + +Fixes: ed2b5a8e6b98 ("phy: phy-rockchip-inno-usb2: support muxed interrupts") +Cc: stable@vger.kernel.org +Signed-off-by: Wentao Liang +Reviewed-by: Neil Armstrong +Link: https://patch.msgid.link/20260109154626.2452034-1-vulab@iscas.ac.cn +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c ++++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +@@ -1473,7 +1473,7 @@ next_child: + rphy); + if (ret) { + dev_err_probe(rphy->dev, ret, "failed to request usb2phy irq handle\n"); +- goto put_child; ++ return ret; + } + } + diff --git a/queue-6.12/selftests-bpf-test-invalid-narrower-ctx-load.patch b/queue-6.12/selftests-bpf-test-invalid-narrower-ctx-load.patch new file mode 100644 index 0000000000..e319490763 --- /dev/null +++ b/queue-6.12/selftests-bpf-test-invalid-narrower-ctx-load.patch @@ -0,0 +1,60 @@ +From ba578b87fe2beef95b37264f8a98c0b505b93de9 Mon Sep 17 00:00:00 2001 +From: Paul Chaignon +Date: Tue, 22 Jul 2025 16:33:37 +0200 +Subject: selftests/bpf: Test invalid narrower ctx load + +From: Paul Chaignon + +commit ba578b87fe2beef95b37264f8a98c0b505b93de9 upstream. + +This patch adds selftests to cover invalid narrower loads on the +context. These used to cause kernel warnings before the previous patch. +To trigger the warning, the load had to be aligned, to read an affected +context field (ex., skb->sk), and not starting at the beginning of the +field. + +The nine new cases all fail without the previous patch. + +Suggested-by: Eduard Zingerman +Signed-off-by: Paul Chaignon +Signed-off-by: Martin KaFai Lau +Acked-by: Eduard Zingerman +Link: https://patch.msgid.link/44cd83ea9c6868079943f0a436c6efa850528cc1.1753194596.git.paul.chaignon@gmail.com +Signed-off-by: Shung-Hsi Yu +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/verifier_ctx.c | 25 +++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c ++++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c +@@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void + : __clobber_all); + } + ++#define narrow_load(type, ctx, field) \ ++ SEC(type) \ ++ __description("narrow load on field " #field " of " #ctx) \ ++ __failure __msg("invalid bpf_context access") \ ++ __naked void invalid_narrow_load##ctx##field(void) \ ++ { \ ++ asm volatile (" \ ++ r1 = *(u32 *)(r1 + %[off]); \ ++ r0 = 0; \ ++ exit;" \ ++ : \ ++ : __imm_const(off, offsetof(struct ctx, field) + 4) \ ++ : __clobber_all); \ ++ } ++ ++narrow_load("cgroup/getsockopt", bpf_sockopt, sk); ++narrow_load("cgroup/getsockopt", bpf_sockopt, optval); ++narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end); ++narrow_load("tc", __sk_buff, sk); ++narrow_load("cgroup/bind4", bpf_sock_addr, sk); ++narrow_load("sockops", bpf_sock_ops, sk); ++narrow_load("sockops", bpf_sock_ops, skb_data); ++narrow_load("sockops", bpf_sock_ops, skb_data_end); ++narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); ++ + char _license[] SEC("license") = "GPL"; diff --git a/queue-6.12/series b/queue-6.12/series index 50a359ae1e..31e75cf16b 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -123,3 +123,16 @@ hid-intel-ish-hid-use-dedicated-unbound-workqueues-to-prevent-resume-blocking.pa hid-intel-ish-hid-fix-wcast-function-type-strict-in-devm_ishtp_alloc_workqueue.patch btrfs-fix-deadlock-in-wait_current_trans-due-to-ignored-transaction-type.patch xfs-set-max_agbno-to-allow-sparse-alloc-of-last-full-inode-chunk.patch +mm-damon-sysfs-scheme-cleanup-quotas-subdirs-on-scheme-dir-setup-failure.patch +mm-damon-sysfs-scheme-cleanup-access_pattern-subdirs-on-scheme-dir-setup-failure.patch +bpf-reject-narrower-access-to-pointer-ctx-fields.patch +selftests-bpf-test-invalid-narrower-ctx-load.patch +mm-kmsan-fix-poisoning-of-high-order-non-compound-pages.patch +mm-fake-numa-allow-later-numa-node-hotplug.patch +mm-numa-memblock-include-asm-numa.h-for-numa_nodes_parsed.patch +phy-phy-rockchip-inno-usb2-use-dev_err_probe-in-the-probe-path.patch +phy-rockchip-inno-usb2-fix-a-double-free-bug-in-rockchip_usb2phy_probe.patch +dmaengine-fsl-edma-fix-clk-leak-on-alloc_chan_resources-failure.patch +mm-page_alloc-vmstat-simplify-refresh_cpu_vm_stats-change-detection.patch +mm-page_alloc-batch-page-freeing-in-decay_pcp_high.patch +mm-page_alloc-prevent-pcp-corruption-with-smp-n.patch