]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Jun 2025 09:14:26 +0000 (11:14 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 23 Jun 2025 09:14:26 +0000 (11:14 +0200)
added patches:
ext4-avoid-remount-errors-with-abort-mount-option.patch
ext4-make-abort-mount-option-handling-standard.patch
mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch
net-fix-checksum-update-for-ila-adj-transport.patch
net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch
net_sched-sch_sfq-don-t-allow-1-packet-limit.patch
net_sched-sch_sfq-handle-bigger-packets.patch
net_sched-sch_sfq-move-the-limit-validation.patch
net_sched-sch_sfq-reject-invalid-perturb-period.patch
net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch

12 files changed:
queue-5.15/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch
queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch [new file with mode: 0644]
queue-5.15/ext4-make-abort-mount-option-handling-standard.patch [new file with mode: 0644]
queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch [new file with mode: 0644]
queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch [new file with mode: 0644]
queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch [new file with mode: 0644]
queue-5.15/series

index b965b30ad006815e8007521ffe2c89aec9eca0ec..e0eebe7f6d093ac905b8c7da91a651aa59d27cca 100644 (file)
@@ -63,13 +63,11 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
 Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
- include/uapi/linux/bpf.h       | 2 ++
- net/core/filter.c              | 5 +++--
- tools/include/uapi/linux/bpf.h | 2 ++
+ include/uapi/linux/bpf.h       |    2 ++
+ net/core/filter.c              |    5 +++--
+ tools/include/uapi/linux/bpf.h |    2 ++
  3 files changed, 7 insertions(+), 2 deletions(-)
 
-diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
-index 0bdeeabbc5a8..2ac62d5ed466 100644
 --- a/include/uapi/linux/bpf.h
 +++ b/include/uapi/linux/bpf.h
 @@ -1695,6 +1695,7 @@ union bpf_attr {
@@ -88,11 +86,9 @@ index 0bdeeabbc5a8..2ac62d5ed466 100644
  };
  
  /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
-diff --git a/net/core/filter.c b/net/core/filter.c
-index 65b7fb9c3d29..169d9ba4e7a0 100644
 --- a/net/core/filter.c
 +++ b/net/core/filter.c
-@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
        bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
        bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
        bool do_mforce = flags & BPF_F_MARK_ENFORCE;
@@ -105,7 +101,7 @@ index 65b7fb9c3d29..169d9ba4e7a0 100644
                return -EINVAL;
        if (unlikely(offset > 0xffff || offset & 1))
                return -EFAULT;
-@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
                if (unlikely(from != 0))
                        return -EINVAL;
  
@@ -114,8 +110,6 @@ index 65b7fb9c3d29..169d9ba4e7a0 100644
                break;
        case 2:
                inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
-diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
-index 54b8c899d21c..fe70f9ce8b00 100644
 --- a/tools/include/uapi/linux/bpf.h
 +++ b/tools/include/uapi/linux/bpf.h
 @@ -1695,6 +1695,7 @@ union bpf_attr {
@@ -134,6 +128,3 @@ index 54b8c899d21c..fe70f9ce8b00 100644
  };
  
  /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
--- 
-2.43.0
-
diff --git a/queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch b/queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch
new file mode 100644 (file)
index 0000000..169bc4c
--- /dev/null
@@ -0,0 +1,62 @@
+From 76486b104168ae59703190566e372badf433314b Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Sat, 5 Oct 2024 00:15:56 +0200
+Subject: ext4: avoid remount errors with 'abort' mount option
+
+From: Jan Kara <jack@suse.cz>
+
+commit 76486b104168ae59703190566e372badf433314b upstream.
+
+When we remount filesystem with 'abort' mount option while changing
+other mount options as well (as is LTP test doing), we can return error
+from the system call after commit d3476f3dad4a ("ext4: don't set
+SB_RDONLY after filesystem errors") because the application of mount
+option changes detects shutdown filesystem and refuses to do anything.
+The behavior of application of other mount options in presence of
+'abort' mount option is currently rather arbitary as some mount option
+changes are handled before 'abort' and some after it.
+
+Move aborting of the filesystem to the end of remount handling so all
+requested changes are properly applied before the filesystem is shutdown
+to have a reasonably consistent behavior.
+
+Fixes: d3476f3dad4a ("ext4: don't set SB_RDONLY after filesystem errors")
+Reported-by: Jan Stancek <jstancek@redhat.com>
+Link: https://lore.kernel.org/all/Zvp6L+oFnfASaoHl@t14s
+Signed-off-by: Jan Kara <jack@suse.cz>
+Tested-by: Jan Stancek <jstancek@redhat.com>
+Link: https://patch.msgid.link/20241004221556.19222-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5849,9 +5849,6 @@ static int ext4_remount(struct super_blo
+               goto restore_opts;
+       }
+-      if (test_opt2(sb, ABORT))
+-              ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
+-
+       sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+               (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+@@ -6027,6 +6024,14 @@ static int ext4_remount(struct super_blo
+        */
+       *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
++      /*
++       * Handle aborting the filesystem as the last thing during remount to
++       * avoid obsure errors during remount when some option changes fail to
++       * apply due to shutdown filesystem.
++       */
++      if (test_opt2(sb, ABORT))
++              ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
++
+       ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
+                orig_data, ext4_quota_mode(sb));
+       kfree(orig_data);
diff --git a/queue-5.15/ext4-make-abort-mount-option-handling-standard.patch b/queue-5.15/ext4-make-abort-mount-option-handling-standard.patch
new file mode 100644 (file)
index 0000000..d24dba0
--- /dev/null
@@ -0,0 +1,66 @@
+From 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Fri, 16 Jun 2023 18:50:50 +0200
+Subject: ext4: make 'abort' mount option handling standard
+
+From: Jan Kara <jack@suse.cz>
+
+commit 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d upstream.
+
+'abort' mount option is the only mount option that has special handling
+and sets a bit in sbi->s_mount_flags. There is not strong reason for
+that so just simplify the code and make 'abort' set a bit in
+sbi->s_mount_opt2 as any other mount option. This simplifies the code
+and will allow us to drop EXT4_MF_FS_ABORTED completely in the following
+patch.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230616165109.21695-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 76486b104168 ("ext4: avoid remount errors with 'abort' mount option")
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ext4.h  |    1 +
+ fs/ext4/super.c |    6 ++----
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1255,6 +1255,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN  0x00000080 /* Optimize group
+                                                   * scanning in mballoc
+                                                   */
++#define EXT4_MOUNT2_ABORT             0x00000100 /* Abort filesystem */
+ #define clear_opt(sb, opt)            EXT4_SB(sb)->s_mount_opt &= \
+                                               ~EXT4_MOUNT_##opt
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2023,6 +2023,7 @@ static const struct mount_opts {
+        MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
+       {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
+ #endif
++      {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
+       {Opt_err, 0, 0}
+ };
+@@ -2143,9 +2144,6 @@ static int handle_mount_opt(struct super
+       case Opt_removed:
+               ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
+               return 1;
+-      case Opt_abort:
+-              ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+-              return 1;
+       case Opt_i_version:
+               sb->s_flags |= SB_I_VERSION;
+               return 1;
+@@ -5851,7 +5849,7 @@ static int ext4_remount(struct super_blo
+               goto restore_opts;
+       }
+-      if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
++      if (test_opt2(sb, ABORT))
+               ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
+       sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
diff --git a/queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch b/queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch
new file mode 100644 (file)
index 0000000..2f9efce
--- /dev/null
@@ -0,0 +1,76 @@
+From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001
+From: Gavin Guo <gavinguo@igalia.com>
+Date: Mon, 21 Apr 2025 19:35:36 +0800
+Subject: mm/huge_memory: fix dereferencing invalid pmd migration entry
+
+From: Gavin Guo <gavinguo@igalia.com>
+
+commit be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 upstream.
+
+When migrating a THP, concurrent access to the PMD migration entry during
+a deferred split scan can lead to an invalid address access, as
+illustrated below.  To prevent this invalid access, it is necessary to
+check the PMD migration entry and return early.  In this context, there is
+no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the
+equality of the target folio.  Since the PMD migration entry is locked, it
+cannot be served as the target.
+
+Mailing list discussion and explanation from Hugh Dickins: "An anon_vma
+lookup points to a location which may contain the folio of interest, but
+might instead contain another folio: and weeding out those other folios is
+precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of
+replacing the wrong folio" comment a few lines above it) is for."
+
+BUG: unable to handle page fault for address: ffffea60001db008
+CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
+RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60
+Call Trace:
+<TASK>
+try_to_migrate_one+0x28c/0x3730
+rmap_walk_anon+0x4f6/0x770
+unmap_folio+0x196/0x1f0
+split_huge_page_to_list_to_order+0x9f6/0x1560
+deferred_split_scan+0xac5/0x12a0
+shrinker_debugfs_scan_write+0x376/0x470
+full_proxy_write+0x15c/0x220
+vfs_write+0x2fc/0xcb0
+ksys_write+0x146/0x250
+do_syscall_64+0x6a/0x120
+entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+The bug is found by syzkaller on an internal kernel, then confirmed on
+upstream.
+
+Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com
+Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/
+Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/
+Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path")
+Signed-off-by: Gavin Guo <gavinguo@igalia.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Acked-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Gavin Shan <gshan@redhat.com>
+Cc: Florent Revest <revest@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[gavin: backport the migration checking logic to __split_huge_pmd]
+Signed-off-by: Gavin Guo <gavinguo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2161,7 +2161,7 @@ void __split_huge_pmd(struct vm_area_str
+       VM_BUG_ON(freeze && !page);
+       if (page) {
+               VM_WARN_ON_ONCE(!PageLocked(page));
+-              if (page != pmd_page(*pmd))
++              if (is_pmd_migration_entry(*pmd) || page != pmd_page(*pmd))
+                       goto out;
+       }
diff --git a/queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch b/queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch
new file mode 100644 (file)
index 0000000..e76ff72
--- /dev/null
@@ -0,0 +1,158 @@
+From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001
+From: Paul Chaignon <paul.chaignon@gmail.com>
+Date: Thu, 29 May 2025 12:28:05 +0200
+Subject: net: Fix checksum update for ILA adj-transport
+
+From: Paul Chaignon <paul.chaignon@gmail.com>
+
+commit 6043b794c7668c19dabc4a93c75b924a19474d59 upstream.
+
+During ILA address translations, the L4 checksums can be handled in
+different ways. One of them, adj-transport, consist in parsing the
+transport layer and updating any found checksum. This logic relies on
+inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when
+in state CHECKSUM_COMPLETE.
+
+This bug can be reproduced with a simple ILA to SIR mapping, assuming
+packets are received with CHECKSUM_COMPLETE:
+
+  $ ip a show dev eth0
+  14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+      link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0
+      inet6 3333:0:0:1::c078/64 scope global
+         valid_lft forever preferred_lft forever
+      inet6 fd00:10:244:1::c078/128 scope global nodad
+         valid_lft forever preferred_lft forever
+      inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll
+         valid_lft forever preferred_lft forever
+  $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \
+      csum-mode adj-transport ident-type luid dev eth0
+
+Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on
+[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with
+SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed
+skb->csum. The translation and drop are visible on pwru [1] traces:
+
+  IFACE   TUPLE                                                        FUNC
+  eth0:9  [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp)  ipv6_rcv
+  eth0:9  [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp)  ip6_rcv_core
+  eth0:9  [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp)  nf_hook_slow
+  eth0:9  [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp)  inet_proto_csum_replace_by_diff
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     tcp_v6_early_demux
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     ip6_route_input
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     ip6_input
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     ip6_input_finish
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     ip6_protocol_deliver_rcu
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     raw6_local_deliver
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     ipv6_raw_deliver
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     tcp_v6_rcv
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     __skb_checksum_complete
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM)
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     skb_release_head_state
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     skb_release_data
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     skb_free_head
+  eth0:9  [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp)     kfree_skbmem
+
+This is happening because inet_proto_csum_replace_by_diff is updating
+skb->csum when it shouldn't. The L4 checksum is updated such that it
+"cancels" the IPv6 address change in terms of checksum computation, so
+the impact on skb->csum is null.
+
+Note this would be different for an IPv4 packet since three fields
+would be updated: the IPv4 address, the IP checksum, and the L4
+checksum. Two would cancel each other and skb->csum would still need
+to be updated to take the L4 checksum change into account.
+
+This patch fixes it by passing an ipv6 flag to
+inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're
+in the IPv6 case. Note the behavior of the only other user of
+inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in
+this patch and fixed in the subsequent patch.
+
+With the fix, using the reproduction from above, I can confirm
+skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP
+SYN proceeds to the application after the ILA translation.
+
+Link: https://github.com/cilium/pwru [1]
+Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module")
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Fixed conflict due to unrelated change in inet_proto_csum_replace_by_diff. ]
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/checksum.h    |    2 +-
+ net/core/filter.c         |    2 +-
+ net/core/utils.c          |    4 ++--
+ net/ipv6/ila/ila_common.c |    6 +++---
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/include/net/checksum.h
++++ b/include/net/checksum.h
+@@ -154,7 +154,7 @@ void inet_proto_csum_replace16(__sum16 *
+                              const __be32 *from, const __be32 *to,
+                              bool pseudohdr);
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+-                                   __wsum diff, bool pseudohdr);
++                                   __wsum diff, bool pseudohdr, bool ipv6);
+ static __always_inline
+ void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1970,7 +1970,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
+               if (unlikely(from != 0))
+                       return -EINVAL;
+-              inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
++              inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false);
+               break;
+       case 2:
+               inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+--- a/net/core/utils.c
++++ b/net/core/utils.c
+@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *
+ EXPORT_SYMBOL(inet_proto_csum_replace16);
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+-                                   __wsum diff, bool pseudohdr)
++                                   __wsum diff, bool pseudohdr, bool ipv6)
+ {
+       if (skb->ip_summed != CHECKSUM_PARTIAL) {
+               *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+-              if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
++              if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
+                       skb->csum = ~csum_add(diff, ~skb->csum);
+       } else if (pseudohdr) {
+               *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+--- a/net/ipv6/ila/ila_common.c
++++ b/net/ipv6/ila/ila_common.c
+@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(st
+                       diff = get_csum_diff(ip6h, p);
+                       inet_proto_csum_replace_by_diff(&th->check, skb,
+-                                                      diff, true);
++                                                      diff, true, true);
+               }
+               break;
+       case NEXTHDR_UDP:
+@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(st
+                       if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+                               diff = get_csum_diff(ip6h, p);
+                               inet_proto_csum_replace_by_diff(&uh->check, skb,
+-                                                              diff, true);
++                                                              diff, true, true);
+                               if (!uh->check)
+                                       uh->check = CSUM_MANGLED_0;
+                       }
+@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(st
+                       diff = get_csum_diff(ip6h, p);
+                       inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+-                                                      diff, true);
++                                                      diff, true, true);
+               }
+               break;
+       }
diff --git a/queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch b/queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch
new file mode 100644 (file)
index 0000000..a1f37f4
--- /dev/null
@@ -0,0 +1,67 @@
+From stable+bounces-155162-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:54 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:17 +0000
+Subject: net_sched: sch_sfq: annotate data-races around q->perturb_period
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Simon Horman <horms@kernel.org>,  Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-1-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit a17ef9e6c2c1cf0fc6cd6ca6a9ce525c67d1da7f upstream.
+
+sfq_perturbation() reads q->perturb_period locklessly.
+Add annotations to fix potential issues.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20240430180015.3111398-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -611,6 +611,7 @@ static void sfq_perturbation(struct time
+       struct Qdisc *sch = q->sch;
+       spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+       siphash_key_t nkey;
++      int period;
+       get_random_bytes(&nkey, sizeof(nkey));
+       spin_lock(root_lock);
+@@ -619,8 +620,12 @@ static void sfq_perturbation(struct time
+               sfq_rehash(sch);
+       spin_unlock(root_lock);
+-      if (q->perturb_period)
+-              mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
++      /* q->perturb_period can change under us from
++       * sfq_change() and sfq_destroy().
++       */
++      period = READ_ONCE(q->perturb_period);
++      if (period)
++              mod_timer(&q->perturb_timer, jiffies + period);
+ }
+ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch,
+               q->quantum = ctl->quantum;
+               q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+       }
+-      q->perturb_period = ctl->perturb_period * HZ;
++      WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+       if (ctl->flows)
+               q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+       if (ctl->divisor) {
+@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sc
+       struct sfq_sched_data *q = qdisc_priv(sch);
+       tcf_block_put(q->block);
+-      q->perturb_period = 0;
++      WRITE_ONCE(q->perturb_period, 0);
+       del_timer_sync(&q->perturb_timer);
+       sfq_free(q->ht);
+       sfq_free(q->slots);
diff --git a/queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch b/queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch
new file mode 100644 (file)
index 0000000..fe83d93
--- /dev/null
@@ -0,0 +1,114 @@
+From stable+bounces-155164-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:57 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:19 +0000
+Subject: net_sched: sch_sfq: don't allow 1 packet limit
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>,  Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-3-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit 10685681bafce6febb39770f3387621bf5d67d0b upstream.
+
+The current implementation does not work correctly with a limit of
+1. iproute2 actually checks for this and this patch adds the check in
+kernel as well.
+
+This fixes the following syzkaller reported crash:
+
+UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:210:6
+index 65535 is out of range for type 'struct sfq_head[128]'
+CPU: 0 PID: 2569 Comm: syz-executor101 Not tainted 5.10.0-smp-DEV #1
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024
+Call Trace:
+  __dump_stack lib/dump_stack.c:79 [inline]
+  dump_stack+0x125/0x19f lib/dump_stack.c:120
+  ubsan_epilogue lib/ubsan.c:148 [inline]
+  __ubsan_handle_out_of_bounds+0xed/0x120 lib/ubsan.c:347
+  sfq_link net/sched/sch_sfq.c:210 [inline]
+  sfq_dec+0x528/0x600 net/sched/sch_sfq.c:238
+  sfq_dequeue+0x39b/0x9d0 net/sched/sch_sfq.c:500
+  sfq_reset+0x13/0x50 net/sched/sch_sfq.c:525
+  qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026
+  tbf_reset+0x3d/0x100 net/sched/sch_tbf.c:319
+  qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026
+  dev_reset_queue+0x8c/0x140 net/sched/sch_generic.c:1296
+  netdev_for_each_tx_queue include/linux/netdevice.h:2350 [inline]
+  dev_deactivate_many+0x6dc/0xc20 net/sched/sch_generic.c:1362
+  __dev_close_many+0x214/0x350 net/core/dev.c:1468
+  dev_close_many+0x207/0x510 net/core/dev.c:1506
+  unregister_netdevice_many+0x40f/0x16b0 net/core/dev.c:10738
+  unregister_netdevice_queue+0x2be/0x310 net/core/dev.c:10695
+  unregister_netdevice include/linux/netdevice.h:2893 [inline]
+  __tun_detach+0x6b6/0x1600 drivers/net/tun.c:689
+  tun_detach drivers/net/tun.c:705 [inline]
+  tun_chr_close+0x104/0x1b0 drivers/net/tun.c:3640
+  __fput+0x203/0x840 fs/file_table.c:280
+  task_work_run+0x129/0x1b0 kernel/task_work.c:185
+  exit_task_work include/linux/task_work.h:33 [inline]
+  do_exit+0x5ce/0x2200 kernel/exit.c:931
+  do_group_exit+0x144/0x310 kernel/exit.c:1046
+  __do_sys_exit_group kernel/exit.c:1057 [inline]
+  __se_sys_exit_group kernel/exit.c:1055 [inline]
+  __x64_sys_exit_group+0x3b/0x40 kernel/exit.c:1055
+ do_syscall_64+0x6c/0xd0
+ entry_SYSCALL_64_after_hwframe+0x61/0xcb
+RIP: 0033:0x7fe5e7b52479
+Code: Unable to access opcode bytes at RIP 0x7fe5e7b5244f.
+RSP: 002b:00007ffd3c800398 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe5e7b52479
+RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000
+RBP: 00007fe5e7bcd2d0 R08: ffffffffffffffb8 R09: 0000000000000014
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe5e7bcd2d0
+R13: 0000000000000000 R14: 00007fe5e7bcdd20 R15: 00007fe5e7b24270
+
+The crash can be also be reproduced with the following (with a tc
+recompiled to allow for sfq limits of 1):
+
+tc qdisc add dev dummy0 handle 1: root tbf rate 1Kbit burst 100b lat 1s
+../iproute2-6.9.0/tc/tc qdisc add dev dummy0 handle 2: parent 1:10 sfq limit 1
+ifconfig dummy0 up
+ping -I dummy0 -f -c2 -W0.1 8.8.8.8
+sleep 1
+
+Scenario that triggers the crash:
+
+* the first packet is sent and queued in TBF and SFQ; qdisc qlen is 1
+
+* TBF dequeues: it peeks from SFQ which moves the packet to the
+  gso_skb list and keeps qdisc qlen set to 1. TBF is out of tokens so
+  it schedules itself for later.
+
+* the second packet is sent and TBF tries to queues it to SFQ. qdisc
+  qlen is now 2 and because the SFQ limit is 1 the packet is dropped
+  by SFQ. At this point qlen is 1, and all of the SFQ slots are empty,
+  however q->tail is not NULL.
+
+At this point, assuming no more packets are queued, when sch_dequeue
+runs again it will decrement the qlen for the current empty slot
+causing an underflow and the subsequent out of bounds access.
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20241204030520.2084663-2-tavip@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch,
+               if (!p)
+                       return -ENOMEM;
+       }
++      if (ctl->limit == 1) {
++              NL_SET_ERR_MSG_MOD(extack, "invalid limit");
++              return -EINVAL;
++      }
+       sch_tree_lock(sch);
+       if (ctl->quantum)
+               q->quantum = ctl->quantum;
diff --git a/queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch b/queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch
new file mode 100644 (file)
index 0000000..2caa082
--- /dev/null
@@ -0,0 +1,161 @@
+From stable+bounces-155163-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:01 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:18 +0000
+Subject: net_sched: sch_sfq: handle bigger packets
+To: stable@vger.kernel.org
+Cc: "Eric Dumazet" <edumazet@google.com>, "Toke Høiland-Jørgensen" <toke@redhat.com>, "Jakub Kicinski" <kuba@kernel.org>
+Message-ID: <20250620154623.331294-2-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit e4650d7ae4252f67e997a632adfae0dd74d3a99a upstream.
+
+SFQ has an assumption on dealing with packets smaller than 64KB.
+
+Even before BIG TCP, TCA_STAB can provide arbitrary big values
+in qdisc_pkt_len(skb)
+
+It is time to switch (struct sfq_slot)->allot to a 32bit field.
+
+sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |   39 +++++++++++++--------------------------
+ 1 file changed, 13 insertions(+), 26 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -77,12 +77,6 @@
+ #define SFQ_EMPTY_SLOT                0xffff
+ #define SFQ_DEFAULT_HASH_DIVISOR 1024
+-/* We use 16 bits to store allot, and want to handle packets up to 64K
+- * Scale allot by 8 (1<<3) so that no overflow occurs.
+- */
+-#define SFQ_ALLOT_SHIFT               3
+-#define SFQ_ALLOT_SIZE(X)     DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
+-
+ /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
+ typedef u16 sfq_index;
+@@ -104,7 +98,7 @@ struct sfq_slot {
+       sfq_index       next; /* next slot in sfq RR chain */
+       struct sfq_head dep; /* anchor in dep[] chains */
+       unsigned short  hash; /* hash value (index in ht[]) */
+-      short           allot; /* credit for this slot */
++      int             allot; /* credit for this slot */
+       unsigned int    backlog;
+       struct red_vars vars;
+@@ -120,7 +114,6 @@ struct sfq_sched_data {
+       siphash_key_t   perturbation;
+       u8              cur_depth;      /* depth of longest slot */
+       u8              flags;
+-      unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
+       struct tcf_proto __rcu *filter_list;
+       struct tcf_block *block;
+       sfq_index       *ht;            /* Hash table ('divisor' slots) */
+@@ -459,7 +452,7 @@ enqueue:
+                */
+               q->tail = slot;
+               /* We could use a bigger initial quantum for new flows */
+-              slot->allot = q->scaled_quantum;
++              slot->allot = q->quantum;
+       }
+       if (++sch->q.qlen <= q->limit)
+               return NET_XMIT_SUCCESS;
+@@ -496,7 +489,7 @@ next_slot:
+       slot = &q->slots[a];
+       if (slot->allot <= 0) {
+               q->tail = slot;
+-              slot->allot += q->scaled_quantum;
++              slot->allot += q->quantum;
+               goto next_slot;
+       }
+       skb = slot_dequeue_head(slot);
+@@ -515,7 +508,7 @@ next_slot:
+               }
+               q->tail->next = next_a;
+       } else {
+-              slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
++              slot->allot -= qdisc_pkt_len(skb);
+       }
+       return skb;
+ }
+@@ -598,7 +591,7 @@ drop:
+                               q->tail->next = x;
+                       }
+                       q->tail = slot;
+-                      slot->allot = q->scaled_quantum;
++                      slot->allot = q->quantum;
+               }
+       }
+       sch->q.qlen -= dropped;
+@@ -628,7 +621,8 @@ static void sfq_perturbation(struct time
+               mod_timer(&q->perturb_timer, jiffies + period);
+ }
+-static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
++static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
++                    struct netlink_ext_ack *extack)
+ {
+       struct sfq_sched_data *q = qdisc_priv(sch);
+       struct tc_sfq_qopt *ctl = nla_data(opt);
+@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch,
+           (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
+               return -EINVAL;
+-      /* slot->allot is a short, make sure quantum is not too big. */
+-      if (ctl->quantum) {
+-              unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+-
+-              if (scaled <= 0 || scaled > SHRT_MAX)
+-                      return -EINVAL;
++      if ((int)ctl->quantum < 0) {
++              NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
++              return -EINVAL;
+       }
+-
+       if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+                                       ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
+               return -EINVAL;
+@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch,
+                       return -ENOMEM;
+       }
+       sch_tree_lock(sch);
+-      if (ctl->quantum) {
++      if (ctl->quantum)
+               q->quantum = ctl->quantum;
+-              q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+-      }
+       WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+       if (ctl->flows)
+               q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, s
+       q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
+       q->maxflows = SFQ_DEFAULT_FLOWS;
+       q->quantum = psched_mtu(qdisc_dev(sch));
+-      q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+       q->perturb_period = 0;
+       get_random_bytes(&q->perturbation, sizeof(q->perturbation));
+       if (opt) {
+-              int err = sfq_change(sch, opt);
++              int err = sfq_change(sch, opt, extack);
+               if (err)
+                       return err;
+       }
+@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Q
+       if (idx != SFQ_EMPTY_SLOT) {
+               const struct sfq_slot *slot = &q->slots[idx];
+-              xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
++              xstats.allot = slot->allot;
+               qs.qlen = slot->qlen;
+               qs.backlog = slot->backlog;
+       }
diff --git a/queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch b/queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch
new file mode 100644 (file)
index 0000000..41c5601
--- /dev/null
@@ -0,0 +1,83 @@
+From stable+bounces-155166-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:20 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:21 +0000
+Subject: net_sched: sch_sfq: move the limit validation
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>,  Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net>
+Message-ID: <20250620154623.331294-5-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 upstream.
+
+It is not sufficient to directly validate the limit on the data that
+the user passes as it can be updated based on how the other parameters
+are changed.
+
+Move the check at the end of the configuration update process to also
+catch scenarios where the limit is indirectly updated, for example
+with the following configurations:
+
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1
+tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1
+
+This fixes the following syzkaller reported crash:
+
+------------[ cut here ]------------
+UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6
+index 65535 is out of range for type 'struct sfq_head[128]'
+CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:94 [inline]
+ dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120
+ ubsan_epilogue lib/ubsan.c:231 [inline]
+ __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429
+ sfq_link net/sched/sch_sfq.c:203 [inline]
+ sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231
+ sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493
+ sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339
+ qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035
+ dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311
+ netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline]
+ dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375
+
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit")
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch,
+               if (!p)
+                       return -ENOMEM;
+       }
+-      if (ctl->limit == 1) {
+-              NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+-              return -EINVAL;
+-      }
+       sch_tree_lock(sch);
+@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch,
+               limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+               maxflows = min_t(u32, maxflows, limit);
+       }
++      if (limit == 1) {
++              sch_tree_unlock(sch);
++              kfree(p);
++              NL_SET_ERR_MSG_MOD(extack, "invalid limit");
++              return -EINVAL;
++      }
+       /* commit configuration */
+       q->limit = limit;
diff --git a/queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch b/queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch
new file mode 100644 (file)
index 0000000..927bb43
--- /dev/null
@@ -0,0 +1,75 @@
+From stable+bounces-155168-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:24 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:23 +0000
+Subject: net_sched: sch_sfq: reject invalid perturb period
+To: stable@vger.kernel.org
+Cc: Eric Dumazet <edumazet@google.com>, Gerrard Tai <gerrard.tai@starlabs.sg>,  Jakub Kicinski <kuba@kernel.org>
+Message-ID: <20250620154623.331294-7-edumazet@google.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 7ca52541c05c832d32b112274f81a985101f9ba8 upstream.
+
+Gerrard Tai reported that SFQ perturb_period has no range check yet,
+and this can be used to trigger a race condition fixed in a separate patch.
+
+We want to make sure ctl->perturb_period * HZ will not overflow
+and is positive.
+
+Tested:
+
+tc qd add dev lo root sfq perturb -10   # negative value : error
+Error: sch_sfq: invalid perturb period.
+
+tc qd add dev lo root sfq perturb 1000000000 # too big : error
+Error: sch_sfq: invalid perturb period.
+
+tc qd add dev lo root sfq perturb 2000000 # acceptable value
+tc -s -d qd sh dev lo
+qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec
+ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -653,6 +653,14 @@ static int sfq_change(struct Qdisc *sch,
+               NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
+               return -EINVAL;
+       }
++
++      if (ctl->perturb_period < 0 ||
++          ctl->perturb_period > INT_MAX / HZ) {
++              NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
++              return -EINVAL;
++      }
++      perturb_period = ctl->perturb_period * HZ;
++
+       if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
+                                       ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
+               return -EINVAL;
+@@ -669,14 +677,12 @@ static int sfq_change(struct Qdisc *sch,
+       headdrop = q->headdrop;
+       maxdepth = q->maxdepth;
+       maxflows = q->maxflows;
+-      perturb_period = q->perturb_period;
+       quantum = q->quantum;
+       flags = q->flags;
+       /* update and validate configuration */
+       if (ctl->quantum)
+               quantum = ctl->quantum;
+-      perturb_period = ctl->perturb_period * HZ;
+       if (ctl->flows)
+               maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+       if (ctl->divisor) {
diff --git a/queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch b/queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch
new file mode 100644 (file)
index 0000000..2897142
--- /dev/null
@@ -0,0 +1,120 @@
+From stable+bounces-155165-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:18 2025
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Jun 2025 15:46:20 +0000
+Subject: net_sched: sch_sfq: use a temporary work area for validating configuration
+To: stable@vger.kernel.org
+Cc: Octavian Purdila <tavip@google.com>, Cong Wang <xiyou.wangcong@gmail.com>,  "David S. Miller" <davem@davemloft.net>
+Message-ID: <20250620154623.331294-4-edumazet@google.com>
+
+From: Octavian Purdila <tavip@google.com>
+
+commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 upstream.
+
+Many configuration parameters have influence on others (e.g. divisor
+-> flows -> limit, depth -> limit) and so it is difficult to correctly
+do all of the validation before applying the configuration. And if a
+validation error is detected late it is difficult to roll back a
+partially applied configuration.
+
+To avoid these issues use a temporary work area to update and validate
+the configuration and only then apply the configuration to the
+internal state.
+
+Signed-off-by: Octavian Purdila <tavip@google.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |   56 ++++++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 44 insertions(+), 12 deletions(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch,
+       struct red_parms *p = NULL;
+       struct sk_buff *to_free = NULL;
+       struct sk_buff *tail = NULL;
++      unsigned int maxflows;
++      unsigned int quantum;
++      unsigned int divisor;
++      int perturb_period;
++      u8 headdrop;
++      u8 maxdepth;
++      int limit;
++      u8 flags;
++
+       if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
+               return -EINVAL;
+@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch,
+               NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+               return -EINVAL;
+       }
++
+       sch_tree_lock(sch);
++
++      limit = q->limit;
++      divisor = q->divisor;
++      headdrop = q->headdrop;
++      maxdepth = q->maxdepth;
++      maxflows = q->maxflows;
++      perturb_period = q->perturb_period;
++      quantum = q->quantum;
++      flags = q->flags;
++
++      /* update and validate configuration */
+       if (ctl->quantum)
+-              q->quantum = ctl->quantum;
+-      WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
++              quantum = ctl->quantum;
++      perturb_period = ctl->perturb_period * HZ;
+       if (ctl->flows)
+-              q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
++              maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+       if (ctl->divisor) {
+-              q->divisor = ctl->divisor;
+-              q->maxflows = min_t(u32, q->maxflows, q->divisor);
++              divisor = ctl->divisor;
++              maxflows = min_t(u32, maxflows, divisor);
+       }
+       if (ctl_v1) {
+               if (ctl_v1->depth)
+-                      q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
++                      maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+               if (p) {
+-                      swap(q->red_parms, p);
+-                      red_set_parms(q->red_parms,
++                      red_set_parms(p,
+                                     ctl_v1->qth_min, ctl_v1->qth_max,
+                                     ctl_v1->Wlog,
+                                     ctl_v1->Plog, ctl_v1->Scell_log,
+                                     NULL,
+                                     ctl_v1->max_P);
+               }
+-              q->flags = ctl_v1->flags;
+-              q->headdrop = ctl_v1->headdrop;
++              flags = ctl_v1->flags;
++              headdrop = ctl_v1->headdrop;
+       }
+       if (ctl->limit) {
+-              q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+-              q->maxflows = min_t(u32, q->maxflows, q->limit);
++              limit = min_t(u32, ctl->limit, maxdepth * maxflows);
++              maxflows = min_t(u32, maxflows, limit);
+       }
++      /* commit configuration */
++      q->limit = limit;
++      q->divisor = divisor;
++      q->headdrop = headdrop;
++      q->maxdepth = maxdepth;
++      q->maxflows = maxflows;
++      WRITE_ONCE(q->perturb_period, perturb_period);
++      q->quantum = quantum;
++      q->flags = flags;
++      if (p)
++              swap(q->red_parms, p);
++
+       qlen = sch->q.qlen;
+       while (sch->q.qlen > q->limit) {
+               dropped += sfq_drop(sch, &to_free);
index ada521d4ba8cb412692ef52c14d437620f38ce81..fd9b42ca3a4847bc0cf36c37f79621c2924c1cdf 100644 (file)
@@ -395,4 +395,14 @@ arm64-spectre-increase-parameters-that-can-be-used-to-turn-off-bhb-mitigation-in
 arm64-bpf-add-bhb-mitigation-to-the-epilogue-for-cbpf-programs.patch
 arm64-bpf-only-mitigate-cbpf-programs-loaded-by-unprivileged-users.patch
 arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch
+net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch
+net_sched-sch_sfq-handle-bigger-packets.patch
+net_sched-sch_sfq-don-t-allow-1-packet-limit.patch
+net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch
+net_sched-sch_sfq-move-the-limit-validation.patch
+net_sched-sch_sfq-reject-invalid-perturb-period.patch
+mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch
+ext4-make-abort-mount-option-handling-standard.patch
+ext4-avoid-remount-errors-with-abort-mount-option.patch
+net-fix-checksum-update-for-ila-adj-transport.patch
 bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch