From b7e2ffff8af4a61985e1eaba6d101b9d7c3e67b1 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 17 Nov 2025 10:14:15 -0500 Subject: [PATCH] Fixes for all trees Signed-off-by: Sasha Levin --- ...omap-argument-to-dax_iomap_sector-as.patch | 34 +++ ...curve25519-disable-on-cpu_big_endian.patch | 47 +++ ...lag-dropping-behavior-in-ksm_madvise.patch | 107 +++++++ queue-5.10/series | 3 + ...curve25519-disable-on-cpu_big_endian.patch | 47 +++ ...lag-dropping-behavior-in-ksm_madvise.patch | 107 +++++++ ...n-kernel-c-flag-handle-late-add_addr.patch | 87 ++++++ queue-5.15/series | 3 + ...lag-dropping-behavior-in-ksm_madvise.patch | 107 +++++++ queue-5.4/series | 1 + ...nc-fix-double-free-in-hci_discovery_.patch | 119 ++++++++ queue-6.1/espintcp-fix-skb-leaks.patch | 75 +++++ ...curve25519-disable-on-cpu_big_endian.patch | 47 +++ ...te-pmd_none_or_clear_bad_unless_tran.patch | 286 ++++++++++++++++++ ...long-for-page-accountings-and-retval.patch | 232 ++++++++++++++ queue-6.1/series | 6 + ...-trace-points-for-tasklet-entry-exit.patch | 151 +++++++++ queue-6.6/espintcp-fix-skb-leaks.patch | 75 +++++ ...curve25519-disable-on-cpu_big_endian.patch | 47 +++ ...head-cache-usage-with-large-max_skb_.patch | 148 +++++++++ .../net-dsa-improve-shutdown-sequence.patch | 123 ++++++++ ...pointer-dereference-in-l3mdev_l3_rcv.patch | 67 ++++ queue-6.6/series | 5 + 23 files changed, 1924 insertions(+) create mode 100644 queue-5.10/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch create mode 100644 queue-5.10/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch create mode 100644 queue-5.10/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch create mode 100644 queue-5.15/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch create mode 100644 queue-5.15/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch create mode 100644 queue-5.15/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch create mode 100644 queue-5.4/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch create mode 100644 queue-6.1/bluetooth-hci_sync-fix-double-free-in-hci_discovery_.patch create mode 100644 queue-6.1/espintcp-fix-skb-leaks.patch create mode 100644 queue-6.1/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch create mode 100644 queue-6.1/mm-mprotect-delete-pmd_none_or_clear_bad_unless_tran.patch create mode 100644 queue-6.1/mm-mprotect-use-long-for-page-accountings-and-retval.patch create mode 100644 queue-6.1/softirq-add-trace-points-for-tasklet-entry-exit.patch create mode 100644 queue-6.6/espintcp-fix-skb-leaks.patch create mode 100644 queue-6.6/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch create mode 100644 queue-6.6/net-allow-small-head-cache-usage-with-large-max_skb_.patch create mode 100644 queue-6.6/net-dsa-improve-shutdown-sequence.patch create mode 100644 queue-6.6/net-fix-null-pointer-dereference-in-l3mdev_l3_rcv.patch diff --git a/queue-5.10/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch b/queue-5.10/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch new file mode 100644 index 0000000000..d1e4324df9 --- /dev/null +++ b/queue-5.10/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch @@ -0,0 +1,34 @@ +From 9a311073c085f20d56532bc2a191360c9707088a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Nov 2025 11:47:03 +0000 +Subject: fsdax: mark the iomap argument to dax_iomap_sector as const + +From: Christoph Hellwig + +[ Upstream commit 7e4f4b2d689d959b03cb07dfbdb97b9696cb1076 ] + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Eliav Farber +Signed-off-by: Sasha Levin +--- + fs/dax.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dax.c b/fs/dax.c +index 91820b9b50b73..2ca33ef5d519d 100644 +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -1006,7 +1006,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, + } + EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); + +-static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) ++static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos) + { + return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; + } +-- +2.51.0 + diff --git a/queue-5.10/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch b/queue-5.10/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch new file mode 100644 index 0000000000..fa7afc0c2b --- /dev/null +++ b/queue-5.10/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch @@ -0,0 +1,47 @@ +From 33dceddcac99fd9e2d2fde970b2f6e649a2258f0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 12:29:49 -0800 +Subject: lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN + +From: Eric Biggers + +commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream. + +On big endian arm kernels, the arm optimized Curve25519 code produces +incorrect outputs and fails the Curve25519 test. This has been true +ever since this code was added. + +It seems that hardly anyone (or even no one?) actually uses big endian +arm kernels. But as long as they're ostensibly supported, we should +disable this code on them so that it's not accidentally used. + +Note: for future-proofing, use !CPU_BIG_ENDIAN instead of +CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could +get removed in the future if big endian support gets dropped. + +Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") +Cc: stable@vger.kernel.org +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Sasha Levin +--- + arch/arm/crypto/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig +index c46c05548080a..c5d676e7f16be 100644 +--- a/arch/arm/crypto/Kconfig ++++ b/arch/arm/crypto/Kconfig +@@ -147,7 +147,7 @@ config CRYPTO_NHPOLY1305_NEON + + config CRYPTO_CURVE25519_NEON + tristate "NEON accelerated Curve25519 scalar multiplication library" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + +-- +2.51.0 + diff --git a/queue-5.10/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch b/queue-5.10/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch new file mode 100644 index 0000000000..655b07e24b --- /dev/null +++ b/queue-5.10/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch @@ -0,0 +1,107 @@ +From 3b28436b716e1d9780050336924151b717d50473 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 12:19:05 +0000 +Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise + +From: Jakub Acs + +[ Upstream commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 ] + +syzkaller discovered the following crash: (kernel BUG) + +[ 44.607039] ------------[ cut here ]------------ +[ 44.607422] kernel BUG at mm/userfaultfd.c:2067! +[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) +[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 + + + +[ 44.617726] Call Trace: +[ 44.617926] +[ 44.619284] userfaultfd_release+0xef/0x1b0 +[ 44.620976] __fput+0x3f9/0xb60 +[ 44.621240] fput_close_sync+0x110/0x210 +[ 44.622222] __x64_sys_close+0x8f/0x120 +[ 44.622530] do_syscall_64+0x5b/0x2f0 +[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 44.623244] RIP: 0033:0x7f365bb3f227 + +Kernel panics because it detects UFFD inconsistency during +userfaultfd_release_all(). Specifically, a VMA which has a valid pointer +to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. + +The inconsistency is caused in ksm_madvise(): when user calls madvise() +with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, +it accidentally clears all flags stored in the upper 32 bits of +vma->vm_flags. + +Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and +int are 32-bit wide. This setup causes the following mishap during the &= +~VM_MERGEABLE assignment. + +VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. +After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then +promoted to unsigned long before the & operation. This promotion fills +upper 32 bits with leading 0s, as we're doing unsigned conversion (and +even for a signed conversion, this wouldn't help as the leading bit is 0). +& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff +instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears +the upper 32-bits of its value. + +Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the +BIT() macro. + +Note: other VM_* flags are not affected: This only happens to the +VM_MERGEABLE flag, as the other VM_* flags are all constants of type int +and after ~ operation, they end up with leading 1 and are thus converted +to unsigned long with leading 1s. + +Note 2: +After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is +no longer a kernel BUG, but a WARNING at the same place: + +[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 + +but the root-cause (flag-drop) remains the same. + +[akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] + Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ +Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de +Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") +Signed-off-by: Jakub Acs +Signed-off-by: Miguel Ojeda +Acked-by: David Hildenbrand +Acked-by: SeongJae Park +Tested-by: Alice Ryhl +Tested-by: Miguel Ojeda +Cc: Xu Xin +Cc: Chengming Zhou +Cc: Peter Xu +Cc: Axel Rasmussen +Cc: +Signed-off-by: Andrew Morton +[ acsjakub: drop rust-compatibility change (no rust in 5.10) ] +Signed-off-by: Jakub Acs +Signed-off-by: Sasha Levin +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index e168d87d6f2ee..4787d39bbad4a 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -296,7 +296,7 @@ extern unsigned int kobjsize(const void *objp); + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ + #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ +-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ ++#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ + + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ +-- +2.51.0 + diff --git a/queue-5.10/series b/queue-5.10/series index 49e53beda0..5862e0e41a 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -197,3 +197,6 @@ regulator-fixed-fix-gpio-descriptor-leak-on-register.patch asoc-cs4271-fix-regulator-leak-on-probe-failure.patch drm-vmwgfx-validate-command-header-size-against-svga.patch alsa-usb-audio-fix-null-pointer-dereference-in-snd_u.patch +fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch +mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch +lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch diff --git a/queue-5.15/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch b/queue-5.15/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch new file mode 100644 index 0000000000..07d129a04f --- /dev/null +++ b/queue-5.15/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch @@ -0,0 +1,47 @@ +From 265ea9af3ca2151d96dd634ee8e991e5e27c7739 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 12:29:46 -0800 +Subject: lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN + +From: Eric Biggers + +commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream. + +On big endian arm kernels, the arm optimized Curve25519 code produces +incorrect outputs and fails the Curve25519 test. This has been true +ever since this code was added. + +It seems that hardly anyone (or even no one?) actually uses big endian +arm kernels. But as long as they're ostensibly supported, we should +disable this code on them so that it's not accidentally used. + +Note: for future-proofing, use !CPU_BIG_ENDIAN instead of +CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could +get removed in the future if big endian support gets dropped. + +Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") +Cc: stable@vger.kernel.org +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Sasha Levin +--- + arch/arm/crypto/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig +index 149a5bd6b88c1..d3d318df0e389 100644 +--- a/arch/arm/crypto/Kconfig ++++ b/arch/arm/crypto/Kconfig +@@ -166,7 +166,7 @@ config CRYPTO_NHPOLY1305_NEON + + config CRYPTO_CURVE25519_NEON + tristate "NEON accelerated Curve25519 scalar multiplication library" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + +-- +2.51.0 + diff --git a/queue-5.15/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch b/queue-5.15/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch new file mode 100644 index 0000000000..84dad4511f --- /dev/null +++ b/queue-5.15/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch @@ -0,0 +1,107 @@ +From 60eddf9b3ec34caa389dc073e6a337f92c6aa7a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 12:17:55 +0000 +Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise + +From: Jakub Acs + +[ Upstream commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 ] + +syzkaller discovered the following crash: (kernel BUG) + +[ 44.607039] ------------[ cut here ]------------ +[ 44.607422] kernel BUG at mm/userfaultfd.c:2067! +[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) +[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 + + + +[ 44.617726] Call Trace: +[ 44.617926] +[ 44.619284] userfaultfd_release+0xef/0x1b0 +[ 44.620976] __fput+0x3f9/0xb60 +[ 44.621240] fput_close_sync+0x110/0x210 +[ 44.622222] __x64_sys_close+0x8f/0x120 +[ 44.622530] do_syscall_64+0x5b/0x2f0 +[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 44.623244] RIP: 0033:0x7f365bb3f227 + +Kernel panics because it detects UFFD inconsistency during +userfaultfd_release_all(). Specifically, a VMA which has a valid pointer +to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. + +The inconsistency is caused in ksm_madvise(): when user calls madvise() +with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, +it accidentally clears all flags stored in the upper 32 bits of +vma->vm_flags. + +Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and +int are 32-bit wide. This setup causes the following mishap during the &= +~VM_MERGEABLE assignment. + +VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. +After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then +promoted to unsigned long before the & operation. This promotion fills +upper 32 bits with leading 0s, as we're doing unsigned conversion (and +even for a signed conversion, this wouldn't help as the leading bit is 0). +& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff +instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears +the upper 32-bits of its value. + +Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the +BIT() macro. + +Note: other VM_* flags are not affected: This only happens to the +VM_MERGEABLE flag, as the other VM_* flags are all constants of type int +and after ~ operation, they end up with leading 1 and are thus converted +to unsigned long with leading 1s. + +Note 2: +After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is +no longer a kernel BUG, but a WARNING at the same place: + +[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 + +but the root-cause (flag-drop) remains the same. + +[akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] + Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ +Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de +Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") +Signed-off-by: Jakub Acs +Signed-off-by: Miguel Ojeda +Acked-by: David Hildenbrand +Acked-by: SeongJae Park +Tested-by: Alice Ryhl +Tested-by: Miguel Ojeda +Cc: Xu Xin +Cc: Chengming Zhou +Cc: Peter Xu +Cc: Axel Rasmussen +Cc: +Signed-off-by: Andrew Morton +[ acsjakub: drop rust-compatibility change (no rust in 5.15) ] +Signed-off-by: Jakub Acs +Signed-off-by: Sasha Levin +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 3598925561b13..071dd864a7b2b 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -310,7 +310,7 @@ extern unsigned int kobjsize(const void *objp); + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ + #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ +-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ ++#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ + + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ +-- +2.51.0 + diff --git a/queue-5.15/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch b/queue-5.15/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch new file mode 100644 index 0000000000..950a839677 --- /dev/null +++ b/queue-5.15/mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch @@ -0,0 +1,87 @@ +From a2957c7f2307eb4c9d81ee29432372ccc41cf5d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Nov 2025 16:22:57 +0100 +Subject: mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR + +From: Matthieu Baerts (NGI0) + +[ Upstream commit e84cb860ac3ce67ec6ecc364433fd5b412c448bc ] + +The special C-flag case expects the ADD_ADDR to be received when +switching to 'fully-established'. But for various reasons, the ADD_ADDR +could be sent after the "4th ACK", and the special case doesn't work. + +On NIPA, the new test validating this special case for the C-flag failed +a few times, e.g. + + 102 default limits, server deny join id 0 + syn rx [FAIL] got 0 JOIN[s] syn rx expected 2 + + Server ns stats + (...) + MPTcpExtAddAddrTx 1 + MPTcpExtEchoAdd 1 + + Client ns stats + (...) + MPTcpExtAddAddr 1 + MPTcpExtEchoAddTx 1 + + synack rx [FAIL] got 0 JOIN[s] synack rx expected 2 + ack rx [FAIL] got 0 JOIN[s] ack rx expected 2 + join Rx [FAIL] see above + syn tx [FAIL] got 0 JOIN[s] syn tx expected 2 + join Tx [FAIL] see above + +I had a suspicion about what the issue could be: the ADD_ADDR might have +been received after the switch to the 'fully-established' state. The +issue was not easy to reproduce. The packet capture shown that the +ADD_ADDR can indeed be sent with a delay, and the client would not try +to establish subflows to it as expected. + +A simple fix is not to mark the endpoints as 'used' in the C-flag case, +when looking at creating subflows to the remote initial IP address and +port. In this case, there is no need to try. + +Note: newly added fullmesh endpoints will still continue to be used as +expected, thanks to the conditions behind mptcp_pm_add_addr_c_flag_case. + +Fixes: 4b1ff850e0c1 ("mptcp: pm: in-kernel: usable client side with C-flag") +Cc: stable@vger.kernel.org +Reviewed-by: Geliang Tang +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-1-8207030cb0e8@kernel.org +Signed-off-by: Jakub Kicinski +[ applied to pm_netlink.c instead of pm_kernel.c ] +Signed-off-by: Sasha Levin +[ I took the version from Sasha from v6.1, and fixed an additional + conflict in pm_netlink.c, because commit a88c9e496937 ("mptcp: do not + block subflows creation on errors") is not in this version and changed + the code around: check_work_pending() is now called directly, followed + by a return instead of a goto. ] +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Sasha Levin +--- + net/mptcp/pm_netlink.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c +index df46ca14ce234..e94b78a8b0ef1 100644 +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -532,6 +532,12 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) + check_work_pending(msk); + } + ++ /* No need to try establishing subflows to remote id0 if not allowed */ ++ if (mptcp_pm_add_addr_c_flag_case(msk)) { ++ check_work_pending(msk); ++ return; ++ } ++ + /* check if should create a new subflow */ + if (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max && +-- +2.51.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 624b697841..e2b7baeb83 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -263,3 +263,6 @@ drm-vmwgfx-validate-command-header-size-against-svga.patch nfsv4-fix-an-incorrect-parameter-when-calling-nfs4_c.patch alsa-usb-audio-fix-null-pointer-dereference-in-snd_u.patch bpf-add-bpf_prog_run_data_pointers.patch +mptcp-pm-in-kernel-c-flag-handle-late-add_addr.patch +mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch +lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch diff --git a/queue-5.4/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch b/queue-5.4/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch new file mode 100644 index 0000000000..89f024f945 --- /dev/null +++ b/queue-5.4/mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch @@ -0,0 +1,107 @@ +From fc8e02a25b0ff34e6bef0cd7c5d1707221df033f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 12:19:20 +0000 +Subject: mm/ksm: fix flag-dropping behavior in ksm_madvise + +From: Jakub Acs + +[ Upstream commit f04aad36a07cc17b7a5d5b9a2d386ce6fae63e93 ] + +syzkaller discovered the following crash: (kernel BUG) + +[ 44.607039] ------------[ cut here ]------------ +[ 44.607422] kernel BUG at mm/userfaultfd.c:2067! +[ 44.608148] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI +[ 44.608814] CPU: 1 UID: 0 PID: 2475 Comm: reproducer Not tainted 6.16.0-rc6 #1 PREEMPT(none) +[ 44.609635] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 +[ 44.610695] RIP: 0010:userfaultfd_release_all+0x3a8/0x460 + + + +[ 44.617726] Call Trace: +[ 44.617926] +[ 44.619284] userfaultfd_release+0xef/0x1b0 +[ 44.620976] __fput+0x3f9/0xb60 +[ 44.621240] fput_close_sync+0x110/0x210 +[ 44.622222] __x64_sys_close+0x8f/0x120 +[ 44.622530] do_syscall_64+0x5b/0x2f0 +[ 44.622840] entry_SYSCALL_64_after_hwframe+0x76/0x7e +[ 44.623244] RIP: 0033:0x7f365bb3f227 + +Kernel panics because it detects UFFD inconsistency during +userfaultfd_release_all(). Specifically, a VMA which has a valid pointer +to vma->vm_userfaultfd_ctx, but no UFFD flags in vma->vm_flags. + +The inconsistency is caused in ksm_madvise(): when user calls madvise() +with MADV_UNMEARGEABLE on a VMA that is registered for UFFD in MINOR mode, +it accidentally clears all flags stored in the upper 32 bits of +vma->vm_flags. + +Assuming x86_64 kernel build, unsigned long is 64-bit and unsigned int and +int are 32-bit wide. This setup causes the following mishap during the &= +~VM_MERGEABLE assignment. + +VM_MERGEABLE is a 32-bit constant of type unsigned int, 0x8000'0000. +After ~ is applied, it becomes 0x7fff'ffff unsigned int, which is then +promoted to unsigned long before the & operation. This promotion fills +upper 32 bits with leading 0s, as we're doing unsigned conversion (and +even for a signed conversion, this wouldn't help as the leading bit is 0). +& operation thus ends up AND-ing vm_flags with 0x0000'0000'7fff'ffff +instead of intended 0xffff'ffff'7fff'ffff and hence accidentally clears +the upper 32-bits of its value. + +Fix it by changing `VM_MERGEABLE` constant to unsigned long, using the +BIT() macro. + +Note: other VM_* flags are not affected: This only happens to the +VM_MERGEABLE flag, as the other VM_* flags are all constants of type int +and after ~ operation, they end up with leading 1 and are thus converted +to unsigned long with leading 1s. + +Note 2: +After commit 31defc3b01d9 ("userfaultfd: remove (VM_)BUG_ON()s"), this is +no longer a kernel BUG, but a WARNING at the same place: + +[ 45.595973] WARNING: CPU: 1 PID: 2474 at mm/userfaultfd.c:2067 + +but the root-cause (flag-drop) remains the same. + +[akpm@linux-foundation.org: rust bindgen wasn't able to handle BIT(), from Miguel] + Link: https://lore.kernel.org/oe-kbuild-all/202510030449.VfSaAjvd-lkp@intel.com/ +Link: https://lkml.kernel.org/r/20251001090353.57523-2-acsjakub@amazon.de +Fixes: 7677f7fd8be7 ("userfaultfd: add minor fault registration mode") +Signed-off-by: Jakub Acs +Signed-off-by: Miguel Ojeda +Acked-by: David Hildenbrand +Acked-by: SeongJae Park +Tested-by: Alice Ryhl +Tested-by: Miguel Ojeda +Cc: Xu Xin +Cc: Chengming Zhou +Cc: Peter Xu +Cc: Axel Rasmussen +Cc: +Signed-off-by: Andrew Morton +[ acsjakub: drop rust-compatibility change (no rust in 5.4) ] +Signed-off-by: Jakub Acs +Signed-off-by: Sasha Levin +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 57cba6e4fdcd7..be8c793233d39 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -293,7 +293,7 @@ extern unsigned int kobjsize(const void *objp); + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ + #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ + #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ +-#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ ++#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ + + #ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + #define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ +-- +2.51.0 + diff --git a/queue-5.4/series b/queue-5.4/series index ab8cc8ffec..d5a5f47928 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -148,3 +148,4 @@ regulator-fixed-fix-gpio-descriptor-leak-on-register.patch asoc-cs4271-fix-regulator-leak-on-probe-failure.patch drm-vmwgfx-validate-command-header-size-against-svga.patch alsa-usb-audio-fix-null-pointer-dereference-in-snd_u.patch +mm-ksm-fix-flag-dropping-behavior-in-ksm_madvise.patch diff --git a/queue-6.1/bluetooth-hci_sync-fix-double-free-in-hci_discovery_.patch b/queue-6.1/bluetooth-hci_sync-fix-double-free-in-hci_discovery_.patch new file mode 100644 index 0000000000..245e3a277f --- /dev/null +++ b/queue-6.1/bluetooth-hci_sync-fix-double-free-in-hci_discovery_.patch @@ -0,0 +1,119 @@ +From d44d208a60ab53ad4c2936949ca23427fe5a2d24 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 10:41:28 +0800 +Subject: Bluetooth: hci_sync: fix double free in + 'hci_discovery_filter_clear()' + +From: Arseniy Krasnov + +[ Upstream commit 2935e556850e9c94d7a00adf14d3cd7fe406ac03 ] + +Function 'hci_discovery_filter_clear()' frees 'uuids' array and then +sets it to NULL. There is a tiny chance of the following race: + +'hci_cmd_sync_work()' + + 'update_passive_scan_sync()' + + 'hci_update_passive_scan_sync()' + + 'hci_discovery_filter_clear()' + kfree(uuids); + + <-------------------------preempted--------------------------------> + 'start_service_discovery()' + + 'hci_discovery_filter_clear()' + kfree(uuids); // DOUBLE FREE + + <-------------------------preempted--------------------------------> + + uuids = NULL; + +To fix it let's add locking around 'kfree()' call and NULL pointer +assignment. Otherwise the following backtrace fires: + +[ ] ------------[ cut here ]------------ +[ ] kernel BUG at mm/slub.c:547! +[ ] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP +[ ] CPU: 3 UID: 0 PID: 246 Comm: bluetoothd Tainted: G O 6.12.19-kernel #1 +[ ] Tainted: [O]=OOT_MODULE +[ ] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ ] pc : __slab_free+0xf8/0x348 +[ ] lr : __slab_free+0x48/0x348 +... +[ ] Call trace: +[ ] __slab_free+0xf8/0x348 +[ ] kfree+0x164/0x27c +[ ] start_service_discovery+0x1d0/0x2c0 +[ ] hci_sock_sendmsg+0x518/0x924 +[ ] __sock_sendmsg+0x54/0x60 +[ ] sock_write_iter+0x98/0xf8 +[ ] do_iter_readv_writev+0xe4/0x1c8 +[ ] vfs_writev+0x128/0x2b0 +[ ] do_writev+0xfc/0x118 +[ ] __arm64_sys_writev+0x20/0x2c +[ ] invoke_syscall+0x68/0xf0 +[ ] el0_svc_common.constprop.0+0x40/0xe0 +[ ] do_el0_svc+0x1c/0x28 +[ ] el0_svc+0x30/0xd0 +[ ] el0t_64_sync_handler+0x100/0x12c +[ ] el0t_64_sync+0x194/0x198 +[ ] Code: 8b0002e6 eb17031f 54fffbe1 d503201f (d4210000) +[ ] ---[ end trace 0000000000000000 ]--- + +Fixes: ad383c2c65a5 ("Bluetooth: hci_sync: Enable advertising when LL privacy is enabled") +Signed-off-by: Arseniy Krasnov +Signed-off-by: Luiz Augusto von Dentz +[ Minor context change fixed. ] +Signed-off-by: Alva Lan +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/hci_core.h | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h +index 4a1faf11785f4..b0a7ceb99eec0 100644 +--- a/include/net/bluetooth/hci_core.h ++++ b/include/net/bluetooth/hci_core.h +@@ -28,7 +28,7 @@ + #include + #include + #include +- ++#include + #include + #include + #include +@@ -92,6 +92,7 @@ struct discovery_state { + unsigned long scan_start; + unsigned long scan_duration; + unsigned long name_resolve_timeout; ++ spinlock_t lock; + }; + + #define SUSPEND_NOTIFIER_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +@@ -881,6 +882,7 @@ static inline void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, + + static inline void discovery_init(struct hci_dev *hdev) + { ++ spin_lock_init(&hdev->discovery.lock); + hdev->discovery.state = DISCOVERY_STOPPED; + INIT_LIST_HEAD(&hdev->discovery.all); + INIT_LIST_HEAD(&hdev->discovery.unknown); +@@ -895,8 +897,12 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) + hdev->discovery.report_invalid_rssi = true; + hdev->discovery.rssi = HCI_RSSI_INVALID; + hdev->discovery.uuid_count = 0; ++ ++ spin_lock(&hdev->discovery.lock); + kfree(hdev->discovery.uuids); + hdev->discovery.uuids = NULL; ++ spin_unlock(&hdev->discovery.lock); ++ + hdev->discovery.scan_start = 0; + hdev->discovery.scan_duration = 0; + } +-- +2.51.0 + diff --git a/queue-6.1/espintcp-fix-skb-leaks.patch b/queue-6.1/espintcp-fix-skb-leaks.patch new file mode 100644 index 0000000000..a68257c8f9 --- /dev/null +++ b/queue-6.1/espintcp-fix-skb-leaks.patch @@ -0,0 +1,75 @@ +From 6bca5c61a1dfdf7690d40503feb63f414c472d18 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 9 Nov 2025 20:39:15 +0800 +Subject: espintcp: fix skb leaks + +From: Sabrina Dubroca + +[ Upstream commit 63c1f19a3be3169e51a5812d22a6d0c879414076 ] + +A few error paths are missing a kfree_skb. + +Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Simon Horman +Signed-off-by: Steffen Klassert +[ Minor context change fixed. ] +Signed-off-by: Ruohan Lan +Signed-off-by: Sasha Levin +--- + net/ipv4/esp4.c | 4 +++- + net/ipv6/esp6.c | 4 +++- + net/xfrm/espintcp.c | 4 +++- + 3 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c +index 8f5417ff355d7..a40f78a6474c6 100644 +--- a/net/ipv4/esp4.c ++++ b/net/ipv4/esp4.c +@@ -152,8 +152,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + + sk = esp_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); +- if (err) ++ if (err) { ++ kfree_skb(skb); + goto out; ++ } + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) +diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c +index 085a83b807afd..48963fc9057bc 100644 +--- a/net/ipv6/esp6.c ++++ b/net/ipv6/esp6.c +@@ -169,8 +169,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); +- if (err) ++ if (err) { ++ kfree_skb(skb); + goto out; ++ } + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) +diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c +index d6fece1ed982d..b26fbaead7a55 100644 +--- a/net/xfrm/espintcp.c ++++ b/net/xfrm/espintcp.c +@@ -168,8 +168,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) + { + struct espintcp_ctx *ctx = espintcp_getctx(sk); + +- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) ++ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) { ++ kfree_skb(skb); + return -ENOBUFS; ++ } + + __skb_queue_tail(&ctx->out_queue, skb); + +-- +2.51.0 + diff --git a/queue-6.1/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch b/queue-6.1/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch new file mode 100644 index 0000000000..0cebbacbc2 --- /dev/null +++ b/queue-6.1/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch @@ -0,0 +1,47 @@ +From c8902bbfd25942d87031a1915d64585d2d60c0b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 12:29:41 -0800 +Subject: lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN + +From: Eric Biggers + +commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream. + +On big endian arm kernels, the arm optimized Curve25519 code produces +incorrect outputs and fails the Curve25519 test. This has been true +ever since this code was added. + +It seems that hardly anyone (or even no one?) actually uses big endian +arm kernels. But as long as they're ostensibly supported, we should +disable this code on them so that it's not accidentally used. + +Note: for future-proofing, use !CPU_BIG_ENDIAN instead of +CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could +get removed in the future if big endian support gets dropped. + +Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") +Cc: stable@vger.kernel.org +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Sasha Levin +--- + arch/arm/crypto/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig +index 3858c4d4cb988..f6323b84631ff 100644 +--- a/arch/arm/crypto/Kconfig ++++ b/arch/arm/crypto/Kconfig +@@ -4,7 +4,7 @@ menu "Accelerated Cryptographic Algorithms for CPU (arm)" + + config CRYPTO_CURVE25519_NEON + tristate "Public key crypto: Curve25519 (NEON)" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + help +-- +2.51.0 + diff --git a/queue-6.1/mm-mprotect-delete-pmd_none_or_clear_bad_unless_tran.patch b/queue-6.1/mm-mprotect-delete-pmd_none_or_clear_bad_unless_tran.patch new file mode 100644 index 0000000000..940befad71 --- /dev/null +++ b/queue-6.1/mm-mprotect-delete-pmd_none_or_clear_bad_unless_tran.patch @@ -0,0 +1,286 @@ +From 073b10bc6319a3d2b446c2091849b2c7d3d454f9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 16:11:01 +0900 +Subject: mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hugh Dickins + +commit 670ddd8cdcbd1d07a4571266ae3517f821728c3a upstream. + +change_pmd_range() had special pmd_none_or_clear_bad_unless_trans_huge(), +required to avoid "bad" choices when setting automatic NUMA hinting under +mmap_read_lock(); but most of that is already covered in pte_offset_map() +now. change_pmd_range() just wants a pmd_none() check before wasting time +on MMU notifiers, then checks on the read-once _pmd value to work out +what's needed for huge cases. If change_pte_range() returns -EAGAIN to +retry if pte_offset_map_lock() fails, nothing more special is needed. + +Link: https://lkml.kernel.org/r/725a42a9-91e9-c868-925-e3a5fd40bb4f@google.com +Signed-off-by: Hugh Dickins +Cc: Alistair Popple +Cc: Anshuman Khandual +Cc: Axel Rasmussen +Cc: Christophe Leroy +Cc: Christoph Hellwig +Cc: David Hildenbrand +Cc: "Huang, Ying" +Cc: Ira Weiny +Cc: Jason Gunthorpe +Cc: Kirill A. Shutemov +Cc: Lorenzo Stoakes +Cc: Matthew Wilcox +Cc: Mel Gorman +Cc: Miaohe Lin +Cc: Mike Kravetz +Cc: Mike Rapoport (IBM) +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Pavel Tatashin +Cc: Peter Xu +Cc: Peter Zijlstra +Cc: Qi Zheng +Cc: Ralph Campbell +Cc: Ryan Roberts +Cc: SeongJae Park +Cc: Song Liu +Cc: Steven Price +Cc: Suren Baghdasaryan +Cc: Thomas Hellström +Cc: Will Deacon +Cc: Yang Shi +Cc: Yu Zhao +Cc: Zack Rusin +Signed-off-by: Andrew Morton +[ Background: It was reported that a bad pmd is seen when automatic NUMA + balancing is marking page table entries as prot_numa: + + [2437548.196018] mm/pgtable-generic.c:50: bad pmd 00000000af22fc02(dffffffe71fbfe02) + [2437548.235022] Call Trace: + [2437548.238234] + [2437548.241060] dump_stack_lvl+0x46/0x61 + [2437548.245689] panic+0x106/0x2e5 + [2437548.249497] pmd_clear_bad+0x3c/0x3c + [2437548.253967] change_pmd_range.isra.0+0x34d/0x3a7 + [2437548.259537] change_p4d_range+0x156/0x20e + [2437548.264392] change_protection_range+0x116/0x1a9 + [2437548.269976] change_prot_numa+0x15/0x37 + [2437548.274774] task_numa_work+0x1b8/0x302 + [2437548.279512] task_work_run+0x62/0x95 + [2437548.283882] exit_to_user_mode_loop+0x1a4/0x1a9 + [2437548.289277] exit_to_user_mode_prepare+0xf4/0xfc + [2437548.294751] ? sysvec_apic_timer_interrupt+0x34/0x81 + [2437548.300677] irqentry_exit_to_user_mode+0x5/0x25 + [2437548.306153] asm_sysvec_apic_timer_interrupt+0x16/0x1b + + This is due to a race condition between change_prot_numa() and + THP migration because the kernel doesn't check is_swap_pmd() and + pmd_trans_huge() atomically: + + change_prot_numa() THP migration + ====================================================================== + - change_pmd_range() + -> is_swap_pmd() returns false, + meaning it's not a PMD migration + entry. + - do_huge_pmd_numa_page() + -> migrate_misplaced_page() sets + migration entries for the THP. + - change_pmd_range() + -> pmd_none_or_clear_bad_unless_trans_huge() + -> pmd_none() and pmd_trans_huge() returns false + - pmd_none_or_clear_bad_unless_trans_huge() + -> pmd_bad() returns true for the migration entry! + + The upstream commit 670ddd8cdcbd ("mm/mprotect: delete + pmd_none_or_clear_bad_unless_trans_huge()") closes this race condition + by checking is_swap_pmd() and pmd_trans_huge() atomically. + + Backporting note: + Unlike the mainline, pte_offset_map_lock() does not check if the pmd + entry is a migration entry or a hugepage; acquires PTL unconditionally + instead of returning failure. Therefore, it is necessary to keep the + !is_swap_pmd() && !pmd_trans_huge() && !pmd_devmap() check before + acquiring the PTL. + + After acquiring the lock, open-code the semantics of + pte_offset_map_lock() in the mainline kernel; change_pte_range() fails + if the pmd value has changed. This requires adding pmd_old parameter + (pmd_t value that is read before calling the function) to + change_pte_range(). ] + +Signed-off-by: Sasha Levin +--- + mm/mprotect.c | 101 +++++++++++++++++++++----------------------------- + 1 file changed, 43 insertions(+), 58 deletions(-) + +diff --git a/mm/mprotect.c b/mm/mprotect.c +index 8216f4018ee75..9381179ff8a95 100644 +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -73,10 +73,12 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, + } + + static long change_pte_range(struct mmu_gather *tlb, +- struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, +- unsigned long end, pgprot_t newprot, unsigned long cp_flags) ++ struct vm_area_struct *vma, pmd_t *pmd, pmd_t pmd_old, ++ unsigned long addr, unsigned long end, pgprot_t newprot, ++ unsigned long cp_flags) + { + pte_t *pte, oldpte; ++ pmd_t pmd_val; + spinlock_t *ptl; + long pages = 0; + int target_node = NUMA_NO_NODE; +@@ -86,21 +88,15 @@ static long change_pte_range(struct mmu_gather *tlb, + + tlb_change_page_size(tlb, PAGE_SIZE); + +- /* +- * Can be called with only the mmap_lock for reading by +- * prot_numa so we must check the pmd isn't constantly +- * changing from under us from pmd_none to pmd_trans_huge +- * and/or the other way around. +- */ +- if (pmd_trans_unstable(pmd)) +- return 0; +- +- /* +- * The pmd points to a regular pte so the pmd can't change +- * from under us even if the mmap_lock is only hold for +- * reading. +- */ + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); ++ /* Make sure pmd didn't change after acquiring ptl */ ++ pmd_val = pmd_read_atomic(pmd); ++ /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ ++ barrier(); ++ if (!pmd_same(pmd_old, pmd_val)) { ++ pte_unmap_unlock(pte, ptl); ++ return -EAGAIN; ++ } + + /* Get target node for single threaded private VMAs */ + if (prot_numa && !(vma->vm_flags & VM_SHARED) && +@@ -288,31 +284,6 @@ static long change_pte_range(struct mmu_gather *tlb, + return pages; + } + +-/* +- * Used when setting automatic NUMA hinting protection where it is +- * critical that a numa hinting PMD is not confused with a bad PMD. +- */ +-static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +-{ +- pmd_t pmdval = pmd_read_atomic(pmd); +- +- /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE +- barrier(); +-#endif +- +- if (pmd_none(pmdval)) +- return 1; +- if (pmd_trans_huge(pmdval)) +- return 0; +- if (unlikely(pmd_bad(pmdval))) { +- pmd_clear_bad(pmd); +- return 1; +- } +- +- return 0; +-} +- + /* Return true if we're uffd wr-protecting file-backed memory, or false */ + static inline bool + uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) +@@ -360,22 +331,34 @@ static inline long change_pmd_range(struct mmu_gather *tlb, + + pmd = pmd_offset(pud, addr); + do { +- long this_pages; +- ++ long ret; ++ pmd_t _pmd; ++again: + next = pmd_addr_end(addr, end); ++ _pmd = pmd_read_atomic(pmd); ++ /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ barrier(); ++#endif + + change_pmd_prepare(vma, pmd, cp_flags); + /* + * Automatic NUMA balancing walks the tables with mmap_lock + * held for read. It's possible a parallel update to occur +- * between pmd_trans_huge() and a pmd_none_or_clear_bad() +- * check leading to a false positive and clearing. +- * Hence, it's necessary to atomically read the PMD value +- * for all the checks. ++ * between pmd_trans_huge(), is_swap_pmd(), and ++ * a pmd_none_or_clear_bad() check leading to a false positive ++ * and clearing. Hence, it's necessary to atomically read ++ * the PMD value for all the checks. + */ +- if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && +- pmd_none_or_clear_bad_unless_trans_huge(pmd)) +- goto next; ++ if (!is_swap_pmd(_pmd) && !pmd_devmap(_pmd) && !pmd_trans_huge(_pmd)) { ++ if (pmd_none(_pmd)) ++ goto next; ++ ++ if (pmd_bad(_pmd)) { ++ pmd_clear_bad(pmd); ++ goto next; ++ } ++ } + + /* invoke the mmu notifier if the pmd is populated */ + if (!range.start) { +@@ -385,7 +368,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb, + mmu_notifier_invalidate_range_start(&range); + } + +- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { ++ if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) { + if ((next - addr != HPAGE_PMD_SIZE) || + uffd_wp_protect_file(vma, cp_flags)) { + __split_huge_pmd(vma, pmd, addr, false, NULL); +@@ -400,11 +383,11 @@ static inline long change_pmd_range(struct mmu_gather *tlb, + * change_huge_pmd() does not defer TLB flushes, + * so no need to propagate the tlb argument. + */ +- int nr_ptes = change_huge_pmd(tlb, vma, pmd, +- addr, newprot, cp_flags); ++ ret = change_huge_pmd(tlb, vma, pmd, ++ addr, newprot, cp_flags); + +- if (nr_ptes) { +- if (nr_ptes == HPAGE_PMD_NR) { ++ if (ret) { ++ if (ret == HPAGE_PMD_NR) { + pages += HPAGE_PMD_NR; + nr_huge_updates++; + } +@@ -415,9 +398,11 @@ static inline long change_pmd_range(struct mmu_gather *tlb, + } + /* fall through, the trans huge pmd just split */ + } +- this_pages = change_pte_range(tlb, vma, pmd, addr, next, +- newprot, cp_flags); +- pages += this_pages; ++ ret = change_pte_range(tlb, vma, pmd, _pmd, addr, next, ++ newprot, cp_flags); ++ if (ret < 0) ++ goto again; ++ pages += ret; + next: + cond_resched(); + } while (pmd++, addr = next, addr != end); +-- +2.51.0 + diff --git a/queue-6.1/mm-mprotect-use-long-for-page-accountings-and-retval.patch b/queue-6.1/mm-mprotect-use-long-for-page-accountings-and-retval.patch new file mode 100644 index 0000000000..3a648844d8 --- /dev/null +++ b/queue-6.1/mm-mprotect-use-long-for-page-accountings-and-retval.patch @@ -0,0 +1,232 @@ +From efcd5872bd1cf004dcbfb7297189f378022f8496 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 16:11:00 +0900 +Subject: mm/mprotect: use long for page accountings and retval + +From: Peter Xu + +commit a79390f5d6a78647fd70856bd42b22d994de0ba2 upstream. + +Switch to use type "long" for page accountings and retval across the whole +procedure of change_protection(). + +The change should have shrinked the possible maximum page number to be +half comparing to previous (ULONG_MAX / 2), but it shouldn't overflow on +any system either because the maximum possible pages touched by change +protection should be ULONG_MAX / PAGE_SIZE. + +Two reasons to switch from "unsigned long" to "long": + + 1. It suites better on count_vm_numa_events(), whose 2nd parameter takes + a long type. + + 2. It paves way for returning negative (error) values in the future. + +Currently the only caller that consumes this retval is change_prot_numa(), +where the unsigned long was converted to an int. Since at it, touching up +the numa code to also take a long, so it'll avoid any possible overflow +too during the int-size convertion. + +Link: https://lkml.kernel.org/r/20230104225207.1066932-3-peterx@redhat.com +Signed-off-by: Peter Xu +Acked-by: Mike Kravetz +Acked-by: James Houghton +Cc: Andrea Arcangeli +Cc: Axel Rasmussen +Cc: David Hildenbrand +Cc: Muchun Song +Cc: Nadav Amit +Signed-off-by: Andrew Morton +[ Adjust context ] +Signed-off-by: Harry Yoo +Signed-off-by: Sasha Levin +--- + include/linux/hugetlb.h | 4 ++-- + include/linux/mm.h | 2 +- + mm/hugetlb.c | 4 ++-- + mm/mempolicy.c | 2 +- + mm/mprotect.c | 26 +++++++++++++------------- + 5 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index 26f2947c399d0..1ddc2b1f96d58 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -233,7 +233,7 @@ void hugetlb_vma_lock_release(struct kref *kref); + + int pmd_huge(pmd_t pmd); + int pud_huge(pud_t pud); +-unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ++long hugetlb_change_protection(struct vm_area_struct *vma, + unsigned long address, unsigned long end, pgprot_t newprot, + unsigned long cp_flags); + +@@ -447,7 +447,7 @@ static inline void move_hugetlb_state(struct page *oldpage, + { + } + +-static inline unsigned long hugetlb_change_protection( ++static inline long hugetlb_change_protection( + struct vm_area_struct *vma, unsigned long address, + unsigned long end, pgprot_t newprot, + unsigned long cp_flags) +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 44381ffaf34b8..f679f9007c823 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -2148,7 +2148,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, + #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ + MM_CP_UFFD_WP_RESOLVE) + +-extern unsigned long change_protection(struct mmu_gather *tlb, ++extern long change_protection(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgprot_t newprot, + unsigned long cp_flags); +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 77c1ac7a05910..e7bac08071dea 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -6668,7 +6668,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + return i ? i : err; + } + +-unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ++long hugetlb_change_protection(struct vm_area_struct *vma, + unsigned long address, unsigned long end, + pgprot_t newprot, unsigned long cp_flags) + { +@@ -6677,7 +6677,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, + pte_t *ptep; + pte_t pte; + struct hstate *h = hstate_vma(vma); +- unsigned long pages = 0, psize = huge_page_size(h); ++ long pages = 0, psize = huge_page_size(h); + bool shared_pmd = false; + struct mmu_notifier_range range; + unsigned long last_addr_mask; +diff --git a/mm/mempolicy.c b/mm/mempolicy.c +index 399d8cb488138..97106305ce21e 100644 +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -628,7 +628,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) + { + struct mmu_gather tlb; +- int nr_updated; ++ long nr_updated; + + tlb_gather_mmu(&tlb, vma->vm_mm); + +diff --git a/mm/mprotect.c b/mm/mprotect.c +index 668bfaa6ed2ae..8216f4018ee75 100644 +--- a/mm/mprotect.c ++++ b/mm/mprotect.c +@@ -72,13 +72,13 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, + return true; + } + +-static unsigned long change_pte_range(struct mmu_gather *tlb, ++static long change_pte_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, + unsigned long end, pgprot_t newprot, unsigned long cp_flags) + { + pte_t *pte, oldpte; + spinlock_t *ptl; +- unsigned long pages = 0; ++ long pages = 0; + int target_node = NUMA_NO_NODE; + bool prot_numa = cp_flags & MM_CP_PROT_NUMA; + bool uffd_wp = cp_flags & MM_CP_UFFD_WP; +@@ -346,13 +346,13 @@ uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags) + } \ + } while (0) + +-static inline unsigned long change_pmd_range(struct mmu_gather *tlb, ++static inline long change_pmd_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pud_t *pud, unsigned long addr, + unsigned long end, pgprot_t newprot, unsigned long cp_flags) + { + pmd_t *pmd; + unsigned long next; +- unsigned long pages = 0; ++ long pages = 0; + unsigned long nr_huge_updates = 0; + struct mmu_notifier_range range; + +@@ -360,7 +360,7 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, + + pmd = pmd_offset(pud, addr); + do { +- unsigned long this_pages; ++ long this_pages; + + next = pmd_addr_end(addr, end); + +@@ -430,13 +430,13 @@ static inline unsigned long change_pmd_range(struct mmu_gather *tlb, + return pages; + } + +-static inline unsigned long change_pud_range(struct mmu_gather *tlb, ++static inline long change_pud_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, + unsigned long end, pgprot_t newprot, unsigned long cp_flags) + { + pud_t *pud; + unsigned long next; +- unsigned long pages = 0; ++ long pages = 0; + + pud = pud_offset(p4d, addr); + do { +@@ -451,13 +451,13 @@ static inline unsigned long change_pud_range(struct mmu_gather *tlb, + return pages; + } + +-static inline unsigned long change_p4d_range(struct mmu_gather *tlb, ++static inline long change_p4d_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, + unsigned long end, pgprot_t newprot, unsigned long cp_flags) + { + p4d_t *p4d; + unsigned long next; +- unsigned long pages = 0; ++ long pages = 0; + + p4d = p4d_offset(pgd, addr); + do { +@@ -472,14 +472,14 @@ static inline unsigned long change_p4d_range(struct mmu_gather *tlb, + return pages; + } + +-static unsigned long change_protection_range(struct mmu_gather *tlb, ++static long change_protection_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, + unsigned long end, pgprot_t newprot, unsigned long cp_flags) + { + struct mm_struct *mm = vma->vm_mm; + pgd_t *pgd; + unsigned long next; +- unsigned long pages = 0; ++ long pages = 0; + + BUG_ON(addr >= end); + pgd = pgd_offset(mm, addr); +@@ -498,12 +498,12 @@ static unsigned long change_protection_range(struct mmu_gather *tlb, + return pages; + } + +-unsigned long change_protection(struct mmu_gather *tlb, ++long change_protection(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgprot_t newprot, + unsigned long cp_flags) + { +- unsigned long pages; ++ long pages; + + BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL); + +-- +2.51.0 + diff --git a/queue-6.1/series b/queue-6.1/series index ccf85f0d25..0409adb99f 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -386,3 +386,9 @@ drm-vmwgfx-validate-command-header-size-against-svga.patch nfsv4-fix-an-incorrect-parameter-when-calling-nfs4_c.patch alsa-usb-audio-fix-null-pointer-dereference-in-snd_u.patch bpf-add-bpf_prog_run_data_pointers.patch +softirq-add-trace-points-for-tasklet-entry-exit.patch +bluetooth-hci_sync-fix-double-free-in-hci_discovery_.patch +mm-mprotect-use-long-for-page-accountings-and-retval.patch +espintcp-fix-skb-leaks.patch +mm-mprotect-delete-pmd_none_or_clear_bad_unless_tran.patch +lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch diff --git a/queue-6.1/softirq-add-trace-points-for-tasklet-entry-exit.patch b/queue-6.1/softirq-add-trace-points-for-tasklet-entry-exit.patch new file mode 100644 index 0000000000..33b895fa69 --- /dev/null +++ b/queue-6.1/softirq-add-trace-points-for-tasklet-entry-exit.patch @@ -0,0 +1,151 @@ +From 1c5da32067f63135b65ebfd90b2260484680c98b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 21:16:20 -0600 +Subject: softirq: Add trace points for tasklet entry/exit + +From: Sumanth Gavini + +commit f4bf3ca2e5cba655824b6e0893a98dfb33ed24e5 upstream. + +Tasklets are supposed to finish their work quickly and should not block the +current running process, but it is not guaranteed that they do so. + +Currently softirq_entry/exit can be used to analyse the total tasklets +execution time, but that's not helpful to track individual tasklets +execution time. That makes it hard to identify tasklet functions, which +take more time than expected. + +Add tasklet_entry/exit trace point support to track individual tasklet +execution. + +Trivial usage example: + # echo 1 > /sys/kernel/debug/tracing/events/irq/tasklet_entry/enable + # echo 1 > /sys/kernel/debug/tracing/events/irq/tasklet_exit/enable + # cat /sys/kernel/debug/tracing/trace + # tracer: nop + # + # entries-in-buffer/entries-written: 4/4 #P:4 + # + # _-----=> irqs-off/BH-disabled + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / _-=> migrate-disable + # |||| / delay + # TASK-PID CPU# ||||| TIMESTAMP FUNCTION + # | | | ||||| | | + -0 [003] ..s1. 314.011428: tasklet_entry: tasklet=0xffffa01ef8db2740 function=tcp_tasklet_func + -0 [003] ..s1. 314.011432: tasklet_exit: tasklet=0xffffa01ef8db2740 function=tcp_tasklet_func + -0 [003] ..s1. 314.017369: tasklet_entry: tasklet=0xffffa01ef8db2740 function=tcp_tasklet_func + -0 [003] ..s1. 314.017371: tasklet_exit: tasklet=0xffffa01ef8db2740 function=tcp_tasklet_func + +Signed-off-by: Lingutla Chandrasekhar +Signed-off-by: J. Avila +Signed-off-by: John Stultz +Signed-off-by: Thomas Gleixner +Reviewed-by: Steven Rostedt (Google) +Link: https://lore.kernel.org/r/20230407230526.1685443-1-jstultz@google.com + +[elavila: Port to android-mainline] +[jstultz: Rebased to upstream, cut unused trace points, added + comments for the tracepoints, reworded commit] + +The intention is to keep the stable branch in sync with upstream fixes +and improve observability without introducing new functionality. + +Signed-off-by: Sumanth Gavini + +Changes in V2: +- No code changes +- Link to V1: https://lore.kernel.org/all/20250812161755.609600-1-sumanth.gavini@yahoo.com/ +- Updated the comment msg before the signed-off-by + +Signed-off-by: Sasha Levin +--- + include/trace/events/irq.h | 47 ++++++++++++++++++++++++++++++++++++++ + kernel/softirq.c | 9 ++++++-- + 2 files changed, 54 insertions(+), 2 deletions(-) + +diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h +index eeceafaaea4c1..a07b4607b6635 100644 +--- a/include/trace/events/irq.h ++++ b/include/trace/events/irq.h +@@ -160,6 +160,53 @@ DEFINE_EVENT(softirq, softirq_raise, + TP_ARGS(vec_nr) + ); + ++DECLARE_EVENT_CLASS(tasklet, ++ ++ TP_PROTO(struct tasklet_struct *t, void *func), ++ ++ TP_ARGS(t, func), ++ ++ TP_STRUCT__entry( ++ __field( void *, tasklet) ++ __field( void *, func) ++ ), ++ ++ TP_fast_assign( ++ __entry->tasklet = t; ++ __entry->func = func; ++ ), ++ ++ TP_printk("tasklet=%ps function=%ps", __entry->tasklet, __entry->func) ++); ++ ++/** ++ * tasklet_entry - called immediately before the tasklet is run ++ * @t: tasklet pointer ++ * @func: tasklet callback or function being run ++ * ++ * Used to find individual tasklet execution time ++ */ ++DEFINE_EVENT(tasklet, tasklet_entry, ++ ++ TP_PROTO(struct tasklet_struct *t, void *func), ++ ++ TP_ARGS(t, func) ++); ++ ++/** ++ * tasklet_exit - called immediately after the tasklet is run ++ * @t: tasklet pointer ++ * @func: tasklet callback or function being run ++ * ++ * Used to find individual tasklet execution time ++ */ ++DEFINE_EVENT(tasklet, tasklet_exit, ++ ++ TP_PROTO(struct tasklet_struct *t, void *func), ++ ++ TP_ARGS(t, func) ++); ++ + #endif /* _TRACE_IRQ_H */ + + /* This part must be outside protection */ +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 9ab5ca399a990..fadc6bbda27b1 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -822,10 +822,15 @@ static void tasklet_action_common(struct softirq_action *a, + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { + if (tasklet_clear_sched(t)) { +- if (t->use_callback) ++ if (t->use_callback) { ++ trace_tasklet_entry(t, t->callback); + t->callback(t); +- else ++ trace_tasklet_exit(t, t->callback); ++ } else { ++ trace_tasklet_entry(t, t->func); + t->func(t->data); ++ trace_tasklet_exit(t, t->func); ++ } + } + tasklet_unlock(t); + continue; +-- +2.51.0 + diff --git a/queue-6.6/espintcp-fix-skb-leaks.patch b/queue-6.6/espintcp-fix-skb-leaks.patch new file mode 100644 index 0000000000..183726853d --- /dev/null +++ b/queue-6.6/espintcp-fix-skb-leaks.patch @@ -0,0 +1,75 @@ +From 244529cff68a46dc68df3b2ecf2638c7dbf55f6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 18:11:00 +0800 +Subject: espintcp: fix skb leaks + +From: Sabrina Dubroca + +[ Upstream commit 63c1f19a3be3169e51a5812d22a6d0c879414076 ] + +A few error paths are missing a kfree_skb. + +Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Simon Horman +Signed-off-by: Steffen Klassert +[ Minor context change fixed. ] +Signed-off-by: Ruohan Lan +Signed-off-by: Sasha Levin +--- + net/ipv4/esp4.c | 4 +++- + net/ipv6/esp6.c | 4 +++- + net/xfrm/espintcp.c | 4 +++- + 3 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c +index 49fd664f50fc0..2caf6a2a819b2 100644 +--- a/net/ipv4/esp4.c ++++ b/net/ipv4/esp4.c +@@ -152,8 +152,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + + sk = esp_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); +- if (err) ++ if (err) { ++ kfree_skb(skb); + goto out; ++ } + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) +diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c +index 7e4c8628cf983..2caaab61b9967 100644 +--- a/net/ipv6/esp6.c ++++ b/net/ipv6/esp6.c +@@ -169,8 +169,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); +- if (err) ++ if (err) { ++ kfree_skb(skb); + goto out; ++ } + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) +diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c +index d3b3f9e720b3b..427072285b8c7 100644 +--- a/net/xfrm/espintcp.c ++++ b/net/xfrm/espintcp.c +@@ -169,8 +169,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) + { + struct espintcp_ctx *ctx = espintcp_getctx(sk); + +- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) ++ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog)) { ++ kfree_skb(skb); + return -ENOBUFS; ++ } + + __skb_queue_tail(&ctx->out_queue, skb); + +-- +2.51.0 + diff --git a/queue-6.6/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch b/queue-6.6/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch new file mode 100644 index 0000000000..54aa419e38 --- /dev/null +++ b/queue-6.6/lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch @@ -0,0 +1,47 @@ +From 9b7f27ca0bab170d242d66aab0219b72dd7c7ad0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Nov 2025 12:29:41 -0800 +Subject: lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN + +From: Eric Biggers + +commit 44e8241c51f762aafa50ed116da68fd6ecdcc954 upstream. + +On big endian arm kernels, the arm optimized Curve25519 code produces +incorrect outputs and fails the Curve25519 test. This has been true +ever since this code was added. + +It seems that hardly anyone (or even no one?) actually uses big endian +arm kernels. But as long as they're ostensibly supported, we should +disable this code on them so that it's not accidentally used. + +Note: for future-proofing, use !CPU_BIG_ENDIAN instead of +CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could +get removed in the future if big endian support gets dropped. + +Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") +Cc: stable@vger.kernel.org +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org +Signed-off-by: Eric Biggers +Signed-off-by: Sasha Levin +--- + arch/arm/crypto/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig +index 847b7a0033569..1f684e29cff2e 100644 +--- a/arch/arm/crypto/Kconfig ++++ b/arch/arm/crypto/Kconfig +@@ -4,7 +4,7 @@ menu "Accelerated Cryptographic Algorithms for CPU (arm)" + + config CRYPTO_CURVE25519_NEON + tristate "Public key crypto: Curve25519 (NEON)" +- depends on KERNEL_MODE_NEON ++ depends on KERNEL_MODE_NEON && !CPU_BIG_ENDIAN + select CRYPTO_LIB_CURVE25519_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CURVE25519 + help +-- +2.51.0 + diff --git a/queue-6.6/net-allow-small-head-cache-usage-with-large-max_skb_.patch b/queue-6.6/net-allow-small-head-cache-usage-with-large-max_skb_.patch new file mode 100644 index 0000000000..fc90a03c89 --- /dev/null +++ b/queue-6.6/net-allow-small-head-cache-usage-with-large-max_skb_.patch @@ -0,0 +1,148 @@ +From adc98ab3ca99120b51b2bda6d7bce4a1d2359470 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 09:37:01 +0800 +Subject: net: allow small head cache usage with large MAX_SKB_FRAGS values + +From: Paolo Abeni + +[ Upstream commit 14ad6ed30a10afbe91b0749d6378285f4225d482 ] + +Sabrina reported the following splat: + + WARNING: CPU: 0 PID: 1 at net/core/dev.c:6935 netif_napi_add_weight_locked+0x8f2/0xba0 + Modules linked in: + CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc1-net-00092-g011b03359038 #996 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 + RIP: 0010:netif_napi_add_weight_locked+0x8f2/0xba0 + Code: e8 c3 e6 6a fe 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc c7 44 24 10 ff ff ff ff e9 8f fb ff ff e8 9e e6 6a fe <0f> 0b e9 d3 fe ff ff e8 92 e6 6a fe 48 8b 04 24 be ff ff ff ff 48 + RSP: 0000:ffffc9000001fc60 EFLAGS: 00010293 + RAX: 0000000000000000 RBX: ffff88806ce48128 RCX: 1ffff11001664b9e + RDX: ffff888008f00040 RSI: ffffffff8317ca42 RDI: ffff88800b325cb6 + RBP: ffff88800b325c40 R08: 0000000000000001 R09: ffffed100167502c + R10: ffff88800b3a8163 R11: 0000000000000000 R12: ffff88800ac1c168 + R13: ffff88800ac1c168 R14: ffff88800ac1c168 R15: 0000000000000007 + FS: 0000000000000000(0000) GS:ffff88806ce00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffff888008201000 CR3: 0000000004c94001 CR4: 0000000000370ef0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + gro_cells_init+0x1ba/0x270 + xfrm_input_init+0x4b/0x2a0 + xfrm_init+0x38/0x50 + ip_rt_init+0x2d7/0x350 + ip_init+0xf/0x20 + inet_init+0x406/0x590 + do_one_initcall+0x9d/0x2e0 + do_initcalls+0x23b/0x280 + kernel_init_freeable+0x445/0x490 + kernel_init+0x20/0x1d0 + ret_from_fork+0x46/0x80 + ret_from_fork_asm+0x1a/0x30 + + irq event stamp: 584330 + hardirqs last enabled at (584338): [] __up_console_sem+0x77/0xb0 + hardirqs last disabled at (584345): [] __up_console_sem+0x5c/0xb0 + softirqs last enabled at (583242): [] netlink_insert+0x14d/0x470 + softirqs last disabled at (583754): [] netif_napi_add_weight_locked+0x77d/0xba0 + +on kernel built with MAX_SKB_FRAGS=45, where SKB_WITH_OVERHEAD(1024) +is smaller than GRO_MAX_HEAD. + +Such built additionally contains the revert of the single page frag cache +so that napi_get_frags() ends up using the page frag allocator, triggering +the splat. + +Note that the underlying issue is independent from the mentioned +revert; address it ensuring that the small head cache will fit either TCP +and GRO allocation and updating napi_alloc_skb() and __netdev_alloc_skb() +to select kmalloc() usage for any allocation fitting such cache. + +Reported-by: Sabrina Dubroca +Suggested-by: Eric Dumazet +Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") +Reviewed-by: Eric Dumazet +Signed-off-by: Paolo Abeni +[ Minor context change fixed. ] +Signed-off-by: Wenshan Lan +Signed-off-by: Sasha Levin +--- + include/net/gro.h | 3 +++ + net/core/gro.c | 3 --- + net/core/skbuff.c | 10 +++++++--- + 3 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/include/net/gro.h b/include/net/gro.h +index 018343254c90a..9260ed367c919 100644 +--- a/include/net/gro.h ++++ b/include/net/gro.h +@@ -10,6 +10,9 @@ + #include + #include + ++/* This should be increased if a protocol with a bigger head is added. */ ++#define GRO_MAX_HEAD (MAX_HEADER + 128) ++ + struct napi_gro_cb { + union { + struct { +diff --git a/net/core/gro.c b/net/core/gro.c +index 397cf59842503..b8cc44406e69b 100644 +--- a/net/core/gro.c ++++ b/net/core/gro.c +@@ -6,9 +6,6 @@ + + #define MAX_GRO_SKBS 8 + +-/* This should be increased if a protocol with a bigger head is added. */ +-#define GRO_MAX_HEAD (MAX_HEADER + 128) +- + static DEFINE_SPINLOCK(offload_lock); + struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); + /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 867832f8bbaea..073e2c5274079 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -67,6 +67,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -96,7 +97,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; + + static struct kmem_cache *skb_small_head_cache __ro_after_init; + +-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) ++#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) ++#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ ++ GRO_MAX_HEAD_PAD)) + + /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. + * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique +@@ -708,7 +711,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ +- if (len <= SKB_WITH_OVERHEAD(1024) || ++ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); +@@ -785,7 +788,8 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, + * When the small frag allocator is available, prefer it over kmalloc + * for small fragments + */ +- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || ++ if ((!NAPI_HAS_SMALL_PAGE_FRAG && ++ len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || + (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { + skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, +-- +2.51.0 + diff --git a/queue-6.6/net-dsa-improve-shutdown-sequence.patch b/queue-6.6/net-dsa-improve-shutdown-sequence.patch new file mode 100644 index 0000000000..b79383da9d --- /dev/null +++ b/queue-6.6/net-dsa-improve-shutdown-sequence.patch @@ -0,0 +1,123 @@ +From f221e5854023b02aa9ef403eb49cc24a9beda8a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 11:11:55 +0800 +Subject: net: dsa: improve shutdown sequence + +From: Vladimir Oltean + +[ Upstream commit 6c24a03a61a245fe34d47582898331fa034b6ccd ] + +Alexander Sverdlin presents 2 problems during shutdown with the +lan9303 driver. One is specific to lan9303 and the other just happens +to reproduce there. + +The first problem is that lan9303 is unique among DSA drivers in that it +calls dev_get_drvdata() at "arbitrary runtime" (not probe, not shutdown, +not remove): + +phy_state_machine() +-> ... + -> dsa_user_phy_read() + -> ds->ops->phy_read() + -> lan9303_phy_read() + -> chip->ops->phy_read() + -> lan9303_mdio_phy_read() + -> dev_get_drvdata() + +But we never stop the phy_state_machine(), so it may continue to run +after dsa_switch_shutdown(). Our common pattern in all DSA drivers is +to set drvdata to NULL to suppress the remove() method that may come +afterwards. But in this case it will result in an NPD. + +The second problem is that the way in which we set +dp->master->dsa_ptr = NULL; is concurrent with receive packet +processing. dsa_switch_rcv() checks once whether dev->dsa_ptr is NULL, +but afterwards, rather than continuing to use that non-NULL value, +dev->dsa_ptr is dereferenced again and again without NULL checks: +dsa_master_find_slave() and many other places. In between dereferences, +there is no locking to ensure that what was valid once continues to be +valid. + +Both problems have the common aspect that closing the master interface +solves them. + +In the first case, dev_close(master) triggers the NETDEV_GOING_DOWN +event in dsa_slave_netdevice_event() which closes slave ports as well. +dsa_port_disable_rt() calls phylink_stop(), which synchronously stops +the phylink state machine, and ds->ops->phy_read() will thus no longer +call into the driver after this point. + +In the second case, dev_close(master) should do this, as per +Documentation/networking/driver.rst: + +| Quiescence +| ---------- +| +| After the ndo_stop routine has been called, the hardware must +| not receive or transmit any data. All in flight packets must +| be aborted. If necessary, poll or wait for completion of +| any reset commands. + +So it should be sufficient to ensure that later, when we zeroize +master->dsa_ptr, there will be no concurrent dsa_switch_rcv() call +on this master. + +The addition of the netif_device_detach() function is to ensure that +ioctls, rtnetlinks and ethtool requests on the slave ports no longer +propagate down to the driver - we're no longer prepared to handle them. + +The race condition actually did not exist when commit 0650bf52b31f +("net: dsa: be compatible with masters which unregister on shutdown") +first introduced dsa_switch_shutdown(). It was created later, when we +stopped unregistering the slave interfaces from a bad spot, and we just +replaced that sequence with a racy zeroization of master->dsa_ptr +(one which doesn't ensure that the interfaces aren't up). + +Reported-by: Alexander Sverdlin +Closes: https://lore.kernel.org/netdev/2d2e3bba17203c14a5ffdabc174e3b6bbb9ad438.camel@siemens.com/ +Closes: https://lore.kernel.org/netdev/c1bf4de54e829111e0e4a70e7bd1cf523c9550ff.camel@siemens.com/ +Fixes: ee534378f005 ("net: dsa: fix panic when DSA master device unbinds on shutdown") +Reviewed-by: Alexander Sverdlin +Tested-by: Alexander Sverdlin +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20240913203549.3081071-1-vladimir.oltean@nxp.com +Signed-off-by: Paolo Abeni +[ Modification: Using dp->master and dp->slave instead of dp->conduit and dp->user ] +Signed-off-by: Rajani Kantha <681739313@139.com> +Signed-off-by: Sasha Levin +--- + net/dsa/dsa.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c +index 07736edc8b6a5..c9bf1a9a6c99b 100644 +--- a/net/dsa/dsa.c ++++ b/net/dsa/dsa.c +@@ -1613,6 +1613,7 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch); + void dsa_switch_shutdown(struct dsa_switch *ds) + { + struct net_device *master, *slave_dev; ++ LIST_HEAD(close_list); + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); +@@ -1622,10 +1623,16 @@ void dsa_switch_shutdown(struct dsa_switch *ds) + + rtnl_lock(); + ++ dsa_switch_for_each_cpu_port(dp, ds) ++ list_add(&dp->master->close_list, &close_list); ++ ++ dev_close_many(&close_list, true); ++ + dsa_switch_for_each_user_port(dp, ds) { + master = dsa_port_to_master(dp); + slave_dev = dp->slave; + ++ netif_device_detach(slave_dev); + netdev_upper_dev_unlink(master, slave_dev); + } + +-- +2.51.0 + diff --git a/queue-6.6/net-fix-null-pointer-dereference-in-l3mdev_l3_rcv.patch b/queue-6.6/net-fix-null-pointer-dereference-in-l3mdev_l3_rcv.patch new file mode 100644 index 0000000000..2897eb1b21 --- /dev/null +++ b/queue-6.6/net-fix-null-pointer-dereference-in-l3mdev_l3_rcv.patch @@ -0,0 +1,67 @@ +From 0cf29ba7f67fa11da24ab69b3d4136370fdb57ee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Nov 2025 11:59:02 +0800 +Subject: net: fix NULL pointer dereference in l3mdev_l3_rcv + +From: Wang Liang + +[ Upstream commit 0032c99e83b9ce6d5995d65900aa4b6ffb501cce ] + +When delete l3s ipvlan: + + ip link del link eth0 ipvlan1 type ipvlan mode l3s + +This may cause a null pointer dereference: + + Call trace: + ip_rcv_finish+0x48/0xd0 + ip_rcv+0x5c/0x100 + __netif_receive_skb_one_core+0x64/0xb0 + __netif_receive_skb+0x20/0x80 + process_backlog+0xb4/0x204 + napi_poll+0xe8/0x294 + net_rx_action+0xd8/0x22c + __do_softirq+0x12c/0x354 + +This is because l3mdev_l3_rcv() visit dev->l3mdev_ops after +ipvlan_l3s_unregister() assign the dev->l3mdev_ops to NULL. The process +like this: + + (CPU1) | (CPU2) + l3mdev_l3_rcv() | + check dev->priv_flags: | + master = skb->dev; | + | + | ipvlan_l3s_unregister() + | set dev->priv_flags + | dev->l3mdev_ops = NULL; + | + visit master->l3mdev_ops | + +To avoid this by do not set dev->l3mdev_ops when unregister l3s ipvlan. + +Suggested-by: David Ahern +Fixes: c675e06a98a4 ("ipvlan: decouple l3s mode dependencies from other modes") +Signed-off-by: Wang Liang +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20250321090353.1170545-1-wangliang74@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Rajani Kantha <681739313@139.com> +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_l3s.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c +index d5b05e8032199..ca35a50bb6405 100644 +--- a/drivers/net/ipvlan/ipvlan_l3s.c ++++ b/drivers/net/ipvlan/ipvlan_l3s.c +@@ -224,5 +224,4 @@ void ipvlan_l3s_unregister(struct ipvl_port *port) + + dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); +- dev->l3mdev_ops = NULL; + } +-- +2.51.0 + diff --git a/queue-6.6/series b/queue-6.6/series index debc8114d7..50197de393 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -446,3 +446,8 @@ acpi-hmat-fix-lockdep-warning-for-hmem_register_reso.patch bpf-add-bpf_prog_run_data_pointers.patch bpf-account-for-current-allocated-stack-depth-in-wid.patch irqchip-riscv-intc-add-missing-free-callback-in-risc.patch +net-fix-null-pointer-dereference-in-l3mdev_l3_rcv.patch +net-allow-small-head-cache-usage-with-large-max_skb_.patch +net-dsa-improve-shutdown-sequence.patch +espintcp-fix-skb-leaks.patch +lib-crypto-arm-curve25519-disable-on-cpu_big_endian.patch -- 2.47.3